diff --git a/htmlparser/.cvsignore b/htmlparser/.cvsignore deleted file mode 100644 index f3c7a7c5da68..000000000000 --- a/htmlparser/.cvsignore +++ /dev/null @@ -1 +0,0 @@ -Makefile diff --git a/htmlparser/public/.cvsignore b/htmlparser/public/.cvsignore deleted file mode 100644 index f3c7a7c5da68..000000000000 --- a/htmlparser/public/.cvsignore +++ /dev/null @@ -1 +0,0 @@ -Makefile diff --git a/htmlparser/public/nsHTMLTagList.h b/htmlparser/public/nsHTMLTagList.h deleted file mode 100644 index 5d366461666c..000000000000 --- a/htmlparser/public/nsHTMLTagList.h +++ /dev/null @@ -1,193 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1999 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -/****** - - This file contains the list of all HTML tags - See nsHTMLTags.h for access to the enum values for tags - - It is designed to be used as inline input to nsHTMLTags.cpp and - nsHTMLContentSink *only* through the magic of C preprocessing. - - All entires must be enclosed in the macro HTML_TAG which will have cruel - and unusual things done to it - - It is recommended (but not strictly necessary) to keep all entries - in alphabetical order - - The first argument to HTML_TAG is both the enum identifier of the - property and the string value. The second argument is the "creator" - method of the form NS_New$TAGNAMEElement, that will be used by - nsHTMLContentSink.cpp to create a content object for a tag of that - type. Use NOTUSED, if the particular tag has a non-standard creator. - - The HTML_OTHER macro is for values in the nsHTMLTag enum that are - not strictly tags. - - Entries *must* use only lowercase characters. - - ** Break these invarient and bad things will happen. ** - - ******/ -HTML_TAG(a, Anchor) -HTML_TAG(abbr, Span) -HTML_TAG(acronym, Span) -HTML_TAG(address, Span) -HTML_TAG(applet, Applet) -HTML_TAG(area, Area) -HTML_TAG(b, Span) -HTML_TAG(base, Shared) -HTML_TAG(basefont, Shared) -HTML_TAG(bdo, Span) -HTML_TAG(bgsound, Span) -HTML_TAG(big, Span) -HTML_TAG(blink, Span) -HTML_TAG(blockquote, Shared) -HTML_TAG(body, Body) -HTML_TAG(br, BR) -HTML_TAG(button, Button) -HTML_TAG(caption, TableCaption) -HTML_TAG(center, Span) -HTML_TAG(cite, Span) -HTML_TAG(code, Span) -HTML_TAG(col, TableCol) -HTML_TAG(colgroup, TableCol) -HTML_TAG(counter, Span) -HTML_TAG(dd, Span) -HTML_TAG(del, Mod) -HTML_TAG(dfn, Span) -HTML_TAG(dir, Shared) -HTML_TAG(div, Div) -HTML_TAG(dl, SharedList) -HTML_TAG(dt, Span) -HTML_TAG(em, Span) -HTML_TAG(embed, Shared) -HTML_TAG(endnote, Span) -HTML_TAG(fieldset, FieldSet) -HTML_TAG(font, Font) -HTML_TAG(form, NOTUSED) -HTML_TAG(frame, Frame) -HTML_TAG(frameset, FrameSet) -HTML_TAG(h1, Heading) -HTML_TAG(h2, Heading) -HTML_TAG(h3, Heading) -HTML_TAG(h4, Heading) -HTML_TAG(h5, Heading) -HTML_TAG(h6, Heading) -HTML_TAG(head, Head) -HTML_TAG(hr, HR) -HTML_TAG(html, Html) -HTML_TAG(i, Span) -HTML_TAG(iframe, IFrame) -HTML_TAG(image, Span) -HTML_TAG(img, Image) -HTML_TAG(input, NOTUSED) -HTML_TAG(ins, Mod) -HTML_TAG(isindex, Shared) -HTML_TAG(kbd, Span) -HTML_TAG(keygen, Span) -HTML_TAG(label, Label) -HTML_TAG(legend, Legend) -HTML_TAG(li, LI) -HTML_TAG(link, Link) -HTML_TAG(listing, Span) -HTML_TAG(map, Map) -HTML_TAG(marquee, Div) -HTML_TAG(menu, Shared) -HTML_TAG(meta, Meta) -HTML_TAG(multicol, Span) -HTML_TAG(nobr, Span) -HTML_TAG(noembed, Div) -HTML_TAG(noframes, Div) -HTML_TAG(noscript, Div) -HTML_TAG(object, Object) -HTML_TAG(ol, SharedList) -HTML_TAG(optgroup, OptGroup) -HTML_TAG(option, Option) -HTML_TAG(p, Paragraph) -HTML_TAG(param, Shared) -HTML_TAG(parsererror, Div) -HTML_TAG(plaintext, Span) -HTML_TAG(pre, Pre) -HTML_TAG(q, Shared) -HTML_TAG(s, Span) -HTML_TAG(samp, Span) -HTML_TAG(script, Script) -HTML_TAG(select, NOTUSED) -HTML_TAG(server, Span) -HTML_TAG(small, Span) -HTML_TAG(sound, Span) -HTML_TAG(sourcetext, Div) -HTML_TAG(spacer, Shared) -HTML_TAG(span, Span) -HTML_TAG(strike, Span) -HTML_TAG(strong, Span) -HTML_TAG(style, Style) -HTML_TAG(sub, Span) -HTML_TAG(sup, Span) -HTML_TAG(table, Table) -HTML_TAG(tbody, TableSection) -HTML_TAG(td, TableCell) -HTML_TAG(textarea, TextArea) -HTML_TAG(tfoot, TableSection) -HTML_TAG(th, TableCell) -HTML_TAG(thead, TableSection) -HTML_TAG(title, Title) -HTML_TAG(tr, TableRow) -HTML_TAG(tt, Span) -HTML_TAG(u, Span) -HTML_TAG(ul, SharedList) -HTML_TAG(var, Span) -HTML_TAG(wbr, Shared) -HTML_TAG(xmp, Span) - - -/* These are not for tags. But they will be included in the nsHTMLTag - enum anyway */ - -/* XXX: The second parameters in some of the following entries look - like they are just wrong. They should really be NOTUSED. For now, - I'm just emulating what nsHTMLContentSink has done all along. -*/ -HTML_OTHER(text, Span) -HTML_OTHER(whitespace, Span) -HTML_OTHER(newline, Span) -HTML_OTHER(comment, Span) -HTML_OTHER(entity, Span) -HTML_OTHER(doctypeDecl, Span) -HTML_OTHER(markupDecl, Span) -HTML_OTHER(instruction, Span) diff --git a/htmlparser/public/nsHTMLTags.h b/htmlparser/public/nsHTMLTags.h deleted file mode 100644 index f9243468948c..000000000000 --- a/htmlparser/public/nsHTMLTags.h +++ /dev/null @@ -1,82 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef nsHTMLTags_h___ -#define nsHTMLTags_h___ - -#include "nsAString.h" - -class nsIAtom; - -/* - Declare the enum list using the magic of preprocessing - enum values are "eHTMLTag_foo" (where foo is the tag) - - To change the list of tags, see nsHTMLTagList.h - - */ -#define HTML_TAG(_tag, _classname) eHTMLTag_##_tag, -#define HTML_OTHER(_tag, _classname) eHTMLTag_##_tag, -enum nsHTMLTag { - /* this enum must be first and must be zero */ - eHTMLTag_unknown = 0, -#include "nsHTMLTagList.h" - - /* can't be moved into nsHTMLTagList since gcc3.4 doesn't like a - comma at the end of enum list*/ - eHTMLTag_userdefined -}; -#undef HTML_TAG -#undef HTML_OTHER - -// Currently there are 110 HTML tags. eHTMLTag_text = 112. -#define NS_HTML_TAG_MAX PRInt32(eHTMLTag_text - 1) - -class nsHTMLTags { -public: - static nsresult AddRefTable(void); - static void ReleaseTable(void); - - static nsHTMLTag LookupTag(const nsAString& aTagName); - static nsHTMLTag CaseSensitiveLookupTag(const PRUnichar* aTagName); - static const PRUnichar *GetStringValue(nsHTMLTag aEnum); - static nsIAtom *GetAtom(nsHTMLTag aEnum); -}; - -#define eHTMLTags nsHTMLTag - -#endif /* nsHTMLTags_h___ */ diff --git a/htmlparser/public/nsHTMLTokens.h b/htmlparser/public/nsHTMLTokens.h deleted file mode 100644 index ae8b75263beb..000000000000 --- a/htmlparser/public/nsHTMLTokens.h +++ /dev/null @@ -1,512 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -/** - * MODULE NOTES: - * @update gess 4/1/98 - * - * This file contains the declarations for all the HTML specific token types that - * our DTD's understand. In fact, the same set of token types are used for XML. - * Currently we have tokens for text, comments, start and end tags, entities, - * attributes, style, script and skipped content. Whitespace and newlines also - * have their own token types, but don't count on them to stay forever. - * - * If you're looking for the html tags, they're in a file called nsHTMLTag.h/cpp. - * - * Most of the token types have a similar API. They have methods to get the type - * of token (GetTokenType); those that represent HTML tags also have a method to - * get type tag type (GetTypeID). In addition, most have a method that causes the - * token to help in the parsing process called (Consume). We've also thrown in a - * few standard debugging methods as well. - */ - -#ifndef HTMLTOKENS_H -#define HTMLTOKENS_H - -#include "nsToken.h" -#include "nsHTMLTags.h" -#include "nsParserError.h" -#include "nsString.h" -#include "nsScannerString.h" - -class nsScanner; - - /******************************************************************* - * This enum defines the set of token types that we currently support. - *******************************************************************/ - -enum eHTMLTokenTypes { - eToken_unknown=0, - eToken_start=1, eToken_end, eToken_comment, eToken_entity, - eToken_whitespace, eToken_newline, eToken_text, eToken_attribute, - eToken_script, eToken_style, eToken_skippedcontent, eToken_instruction, - eToken_cdatasection, eToken_error, eToken_doctypeDecl, eToken_markupDecl, - eToken_last //make sure this stays the last token... -}; - -enum eHTMLCategory { - eHTMLCategory_unknown=0, - eHTMLCategory_inline, - eHTMLCategory_block, - eHTMLCategory_blockAndInline, - eHTMLCategory_list, - eHTMLCategory_table, - eHTMLCategory_tablepart, - eHTMLCategory_tablerow, - eHTMLCategory_tabledata, - eHTMLCategory_head, - eHTMLCategory_html, - eHTMLCategory_body, - eHTMLCategory_form, - eHTMLCategory_options, - eHTMLCategory_frameset, - eHTMLCategory_text -}; - - -nsresult ConsumeQuotedString(PRUnichar aChar,nsString& aString,nsScanner& aScanner); -nsresult ConsumeAttributeText(PRUnichar aChar,nsString& aString,nsScanner& aScanner); -const PRUnichar* GetTagName(PRInt32 aTag); -//PRInt32 FindEntityIndex(nsString& aString,PRInt32 aCount=-1); - - - -/** - * This declares the basic token type used in the HTML DTD's. - * @update gess 3/25/98 - */ -class CHTMLToken : public CToken { -public: - virtual ~CHTMLToken(); - CHTMLToken(eHTMLTags aTag); - - virtual eContainerInfo GetContainerInfo(void) const {return eFormUnknown;} - virtual void SetContainerInfo(eContainerInfo aInfo) { } - -protected: -}; - -/** - * This declares start tokens, which always take the form . - * This class also knows how to consume related attributes. - * - * @update gess 3/25/98 - */ -class CStartToken: public CHTMLToken { - CTOKEN_IMPL_SIZEOF - -public: - CStartToken(eHTMLTags aTag=eHTMLTag_unknown); - CStartToken(const nsAString& aString); - CStartToken(const nsAString& aName,eHTMLTags aTag); - - virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); - virtual PRInt32 GetTypeID(void); - virtual PRInt32 GetTokenType(void); - - virtual PRBool IsEmpty(void); - virtual void SetEmpty(PRBool aValue); - - virtual const nsAString& GetStringValue(); - virtual void GetSource(nsString& anOutputString); - virtual void AppendSourceTo(nsAString& anOutputString); - - // the following info is used to set well-formedness state on start tags... - virtual eContainerInfo GetContainerInfo(void) const {return mContainerInfo;} - virtual void SetContainerInfo(eContainerInfo aContainerInfo) { - mContainerInfo=aContainerInfo; - } - virtual PRBool IsWellFormed(void) const { - return eWellFormed == mContainerInfo; - } - - nsString mTextValue; - nsString mTrailingContent; -protected: - eContainerInfo mContainerInfo; - PRPackedBool mEmpty; -#ifdef DEBUG - PRPackedBool mAttributed; -#endif -}; - - -/** - * This declares end tokens, which always take the - * form . This class also knows how to consume - * related attributes. - * - * @update gess 3/25/98 - */ -class CEndToken: public CHTMLToken { - CTOKEN_IMPL_SIZEOF - -public: - CEndToken(eHTMLTags aTag); - CEndToken(const nsAString& aString); - CEndToken(const nsAString& aName,eHTMLTags aTag); - virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); - virtual PRInt32 GetTypeID(void); - virtual PRInt32 GetTokenType(void); - - virtual const nsAString& GetStringValue(); - virtual void GetSource(nsString& anOutputString); - virtual void AppendSourceTo(nsAString& anOutputString); - -protected: - nsString mTextValue; -}; - - -/** - * This declares comment tokens. Comments are usually - * thought of as tokens, but we treat them that way - * here so that the parser can have a consistent view - * of all tokens. - * - * @update gess 3/25/98 - */ -class CCommentToken: public CHTMLToken { - CTOKEN_IMPL_SIZEOF - -public: - CCommentToken(); - CCommentToken(const nsAString& aString); - virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); - virtual PRInt32 GetTokenType(void); - virtual const nsAString& GetStringValue(void); - virtual void AppendSourceTo(nsAString& anOutputString); - - nsresult ConsumeStrictComment(nsScanner& aScanner); - nsresult ConsumeQuirksComment(nsScanner& aScanner); - -protected: - nsScannerSubstring mComment; // does not include MDO & MDC - nsScannerSubstring mCommentDecl; // includes MDO & MDC -}; - - -/** - * This class declares entity tokens, which always take - * the form &xxxx;. This class also offers a few utility - * methods that allow you to easily reduce entities. - * - * @update gess 3/25/98 - */ -class CEntityToken : public CHTMLToken { - CTOKEN_IMPL_SIZEOF - -public: - CEntityToken(); - CEntityToken(const nsAString& aString); - virtual PRInt32 GetTokenType(void); - PRInt32 TranslateToUnicodeStr(nsString& aString); - virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); - static nsresult ConsumeEntity(PRUnichar aChar, nsString& aString, - nsScanner& aScanner); - static PRInt32 TranslateToUnicodeStr(PRInt32 aValue,nsString& aString); - - virtual const nsAString& GetStringValue(void); - virtual void GetSource(nsString& anOutputString); - virtual void AppendSourceTo(nsAString& anOutputString); - -protected: - nsString mTextValue; -}; - - -/** - * Whitespace tokens are used where whitespace can be - * detected as distinct from text. This allows us to - * easily skip leading/trailing whitespace when desired. - * - * @update gess 3/25/98 - */ -class CWhitespaceToken: public CHTMLToken { - CTOKEN_IMPL_SIZEOF - -public: - CWhitespaceToken(); - CWhitespaceToken(const nsAString& aString); - virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); - virtual PRInt32 GetTokenType(void); - virtual const nsAString& GetStringValue(void); - -protected: - nsString mTextValue; -}; - -/** - * Text tokens contain the normalized form of html text. - * These tokens are guaranteed not to contain entities, - * start or end tags, or newlines. - * - * @update gess 3/25/98 - */ -class CTextToken: public CHTMLToken { - CTOKEN_IMPL_SIZEOF - -public: - CTextToken(); - CTextToken(const nsAString& aString); - virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); - nsresult ConsumeUntil(PRUnichar aChar, PRBool aIgnoreComments, - nsScanner& aScanner, nsString& aEndTagName, - PRInt32 aFlag, PRBool& aFlushTokens); - virtual PRInt32 GetTokenType(void); - virtual PRInt32 GetTextLength(void); - virtual void CopyTo(nsAString& aStr); - virtual const nsAString& GetStringValue(void); - virtual void Bind(nsScanner* aScanner, nsScannerIterator& aStart, - nsScannerIterator& aEnd); - virtual void Bind(const nsAString& aStr); - -protected: - nsScannerSubstring mTextValue; -}; - - -/** - * CDATASection tokens contain raw unescaped text content delimited by - * a ![CDATA[ and ]]. - * XXX Not really a HTML construct - maybe we need a separation - * - * @update vidur 11/12/98 - */ -class CCDATASectionToken : public CHTMLToken { - CTOKEN_IMPL_SIZEOF - -public: - CCDATASectionToken(eHTMLTags aTag = eHTMLTag_unknown); - CCDATASectionToken(const nsAString& aString); - virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); - virtual PRInt32 GetTokenType(void); - virtual const nsAString& GetStringValue(void); - -protected: - nsString mTextValue; -}; - - -/** - * Declaration tokens contain raw unescaped text content (not really, but - * right now we use this only for view source). - * XXX Not really a HTML construct - maybe we need a separation - * - */ -class CMarkupDeclToken : public CHTMLToken { - CTOKEN_IMPL_SIZEOF - -public: - CMarkupDeclToken(); - CMarkupDeclToken(const nsAString& aString); - virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); - virtual PRInt32 GetTokenType(void); - virtual const nsAString& GetStringValue(void); - -protected: - nsScannerSubstring mTextValue; -}; - - -/** - * Attribute tokens are used to contain attribute key/value - * pairs whereever they may occur. Typically, they should - * occur only in start tokens. However, we may expand that - * ability when XML tokens become commonplace. - * - * @update gess 3/25/98 - */ -class CAttributeToken: public CHTMLToken { - CTOKEN_IMPL_SIZEOF - -public: - CAttributeToken(); - CAttributeToken(const nsAString& aString); - CAttributeToken(const nsAString& aKey, const nsAString& aString); - ~CAttributeToken() {} - virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); - virtual PRInt32 GetTokenType(void); - virtual const nsAString& GetKey(void); // XXX {return mTextKey;} - virtual void SetKey(const nsAString& aKey); - virtual void BindKey(nsScanner* aScanner, nsScannerIterator& aStart, - nsScannerIterator& aEnd); - virtual const nsAString& GetValue(void) {return mTextValue;} - virtual void SanitizeKey(); - virtual const nsAString& GetStringValue(void); - virtual void GetSource(nsString& anOutputString); - virtual void AppendSourceTo(nsAString& anOutputString); - - PRPackedBool mHasEqualWithoutValue; -protected: -#ifdef DEBUG - PRPackedBool mLastAttribute; -#endif - nsAutoString mTextValue; - nsScannerSubstring mTextKey; -}; - - -/** - * Newline tokens contain, you guessed it, newlines. - * They consume newline (CR/LF) either alone or in pairs. - * - * @update gess 3/25/98 - */ -class CNewlineToken: public CHTMLToken { - CTOKEN_IMPL_SIZEOF - -public: - CNewlineToken(); - virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); - virtual PRInt32 GetTokenType(void); - virtual const nsAString& GetStringValue(void); - - static void AllocNewline(); - static void FreeNewline(); -}; - - -/** - * Script tokens contain sequences of javascript (or, gulp, - * any other script you care to send). We don't tokenize - * it here, nor validate it. We just wrap it up, and pass - * it along to the html parser, who sends it (later on) - * to the scripting engine. - * - * @update gess 3/25/98 - */ -class CScriptToken: public CHTMLToken { - CTOKEN_IMPL_SIZEOF - -public: - CScriptToken(); - CScriptToken(const nsAString& aString); - virtual PRInt32 GetTokenType(void); - virtual const nsAString& GetStringValue(void); - -protected: - nsString mTextValue; -}; - - -/** - * Style tokens contain sequences of css style. We don't - * tokenize it here, nor validate it. We just wrap it up, - * and pass it along to the html parser, who sends it - * (later on) to the style engine. - * - * @update gess 3/25/98 - */ -class CStyleToken: public CHTMLToken { - CTOKEN_IMPL_SIZEOF - -public: - CStyleToken(); - CStyleToken(const nsAString& aString); - virtual PRInt32 GetTokenType(void); - virtual const nsAString& GetStringValue(void); - -protected: - nsString mTextValue; -}; - - -/** - * Whitespace tokens are used where whitespace can be - * detected as distinct from text. This allows us to - * easily skip leading/trailing whitespace when desired. - * - * @update gess 3/25/98 - */ -class CInstructionToken: public CHTMLToken { - CTOKEN_IMPL_SIZEOF - -public: - CInstructionToken(); - CInstructionToken(const nsAString& aString); - virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); - virtual PRInt32 GetTokenType(void); - virtual const nsAString& GetStringValue(void); - -protected: - nsString mTextValue; -}; - -class CErrorToken : public CHTMLToken { - CTOKEN_IMPL_SIZEOF - -public: - CErrorToken(nsParserError* aError=0); - ~CErrorToken(); - virtual PRInt32 GetTokenType(void); - - void SetError(nsParserError* aError); // CErrorToken takes ownership of aError - - // The nsParserError object returned by GetError is still owned by CErrorToken. - // DO NOT use the delete operator on it. Should we change this so that a copy - // of nsParserError is returned which needs to be destroyed by the consumer? - const nsParserError* GetError(void); - - virtual const nsAString& GetStringValue(void); -protected: - nsString mTextValue; - nsParserError* mError; -}; - -/** - * This token is generated by the HTML and Expat tokenizers - * when they see the doctype declaration ("") - * - */ - -class CDoctypeDeclToken: public CHTMLToken { - CTOKEN_IMPL_SIZEOF - -public: - CDoctypeDeclToken(eHTMLTags aTag=eHTMLTag_unknown); - CDoctypeDeclToken(const nsAString& aString,eHTMLTags aTag=eHTMLTag_unknown); - virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); - virtual PRInt32 GetTokenType(void); - virtual const nsAString& GetStringValue(void); - virtual void SetStringValue(const nsAString& aStr); - -protected: - nsString mTextValue; -}; - -#endif diff --git a/htmlparser/public/nsIContentSink.h b/htmlparser/public/nsIContentSink.h deleted file mode 100644 index 0a8b9d7364fd..000000000000 --- a/htmlparser/public/nsIContentSink.h +++ /dev/null @@ -1,124 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -#ifndef nsIContentSink_h___ -#define nsIContentSink_h___ - -/** - * MODULE NOTES: - * @update gess 4/1/98 - * - * This pure virtual interface is used as the "glue" that connects the parsing - * process to the content model construction process. - * - * The icontentsink interface is a very lightweight wrapper that represents the - * content-sink model building process. There is another one that you may care - * about more, which is the IHTMLContentSink interface. (See that file for details). - */ -#include "nsISupports.h" -#include "nsString.h" - -class nsIParser; - -#define NS_ICONTENT_SINK_IID \ -{ 0xa6cf9052, 0x15b3, 0x11d2,{0x93, 0x2e, 0x00, 0x80, 0x5f, 0x8a, 0xdd, 0x32}} - -// The base value for the content ID counter. -// Values greater than or equal to this base value are used -// by each of the content sinks to assign unique values -// to the content objects created by them. -#define NS_CONTENT_ID_COUNTER_BASE 10000 - -class nsIContentSink : public nsISupports { -public: - - NS_DEFINE_STATIC_IID_ACCESSOR(NS_ICONTENT_SINK_IID) - - /** - * This method gets called when the parser begins the process - * of building the content model via the content sink. - * - * @update 5/7/98 gess - */ - NS_IMETHOD WillBuildModel(void)=0; - - /** - * This method gets called when the parser concludes the process - * of building the content model via the content sink. - * - * @update 5/7/98 gess - */ - NS_IMETHOD DidBuildModel()=0; - - /** - * This method gets called when the parser gets i/o blocked, - * and wants to notify the sink that it may be a while before - * more data is available. - * - * @update 5/7/98 gess - */ - NS_IMETHOD WillInterrupt(void)=0; - - /** - * This method gets called when the parser i/o gets unblocked, - * and we're about to start dumping content again to the sink. - * - * @update 5/7/98 gess - */ - NS_IMETHOD WillResume(void)=0; - - /** - * This method gets called by the parser so that the content - * sink can retain a reference to the parser. The expectation - * is that the content sink will drop the reference when it - * gets the DidBuildModel notification i.e. when parsing is done. - */ - NS_IMETHOD SetParser(nsIParser* aParser)=0; - - /** - * Flush all pending notifications so that the content model - * is in sync with the state of the sink. - */ - NS_IMETHOD FlushPendingNotifications()=0; - - /** - * Set the document character set. This should be passed on to the - * document itself. - */ - NS_IMETHOD SetDocumentCharset(nsACString& aCharset)=0; -}; - -#endif /* nsIContentSink_h___ */ diff --git a/htmlparser/public/nsIDTD.h b/htmlparser/public/nsIDTD.h deleted file mode 100644 index 470e1663c4e9..000000000000 --- a/htmlparser/public/nsIDTD.h +++ /dev/null @@ -1,252 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef nsIDTD_h___ -#define nsIDTD_h___ - -/** - * MODULE NOTES: - * @update gess 7/20/98 - * - * This interface defines standard interface for DTD's. Note that this - * isn't HTML specific. DTD's have several functions within the parser - * system: - * 1) To coordinate the consumption of an input stream via the - * parser - * 2) To serve as proxy to represent the containment rules of the - * underlying document - * 3) To offer autodetection services to the parser (mainly for doc - * conversion) - * */ - -#include "nsISupports.h" -#include "nsString.h" -#include "prtypes.h" -#include "nsITokenizer.h" - -#define NS_IDTD_IID \ - { 0xa6cf9053, 0x15b3, 0x11d2,{0x93, 0x2e, 0x00, 0x80, 0x5f, 0x8a, 0xdd, 0x32}} - -enum eAutoDetectResult { - eUnknownDetect, - eValidDetect, - ePrimaryDetect, - eInvalidDetect -}; - -enum nsDTDMode { - eDTDMode_unknown = 0, - eDTDMode_quirks, //pre 4.0 versions - eDTDMode_almost_standards, - eDTDMode_full_standards, - eDTDMode_autodetect, - eDTDMode_fragment -}; - - -class nsIParser; -class CToken; -class nsIURI; -class nsIContentSink; -class CParserContext; -class nsIAtom; - -class nsIDTD : public nsISupports -{ -public: - - NS_DEFINE_STATIC_IID_ACCESSOR(NS_IDTD_IID) - - - NS_IMETHOD_(const nsIID&) GetMostDerivedIID(void) const = 0; - - /** - * Call this method if you want the DTD to construct a clone of itself. - * @update gess7/23/98 - * @param - * @return - */ - NS_IMETHOD CreateNewInstance(nsIDTD** aInstancePtrResult) = 0; - - /** - * This method is called to determine if the given DTD can parse - * a document in a given source-type. - * NOTE: Parsing always assumes that the end result will involve - * storing the result in the main content model. - * @update gess6/24/98 - * @param aContentType -- string representing type of doc to be - * converted (ie text/html) - * @return TRUE if this DTD can satisfy the request; FALSE otherwise. - */ - NS_IMETHOD_(eAutoDetectResult) CanParse(CParserContext& aParserContext, - const nsString& aBuffer, - PRInt32 aVersion) = 0; - - NS_IMETHOD WillBuildModel(const CParserContext& aParserContext, - nsITokenizer* aTokenizer, - nsIContentSink* aSink) = 0; - - /** - * Called by the parser after the parsing process has concluded - * @update gess5/18/98 - * @param anErrorCode - contains error code resulting from parse process - * @return - */ - NS_IMETHOD DidBuildModel(nsresult anErrorCode, PRBool aNotifySink, - nsIParser* aParser, - nsIContentSink* aSink) = 0; - - /** - * Called by the parser after the parsing process has concluded - * @update gess5/18/98 - * @param anErrorCode - contains error code resulting from parse process - * @return - */ - NS_IMETHOD BuildModel(nsIParser* aParser, nsITokenizer* aTokenizer, - nsITokenObserver* anObserver, - nsIContentSink* aSink) = 0; - - /** - * Called during model building phase of parse process. Each token - * created during the parse phase is stored in a deque (in the - * parser) and are passed to this method so that the DTD can - * process the token. Ultimately, the DTD will transform given - * token into calls onto a contentsink. - * @update gess 3/25/98 - * @param aToken -- token object to be put into content model - * @return error code (usually 0) - */ - NS_IMETHOD HandleToken(CToken* aToken,nsIParser* aParser) = 0; - - /** - * If the parse process gets interrupted midway, this method is - * called by the parser prior to resuming the process. - * @update gess5/18/98 - * @return ignored - */ - NS_IMETHOD WillResumeParse(nsIContentSink* aSink) = 0; - - /** - * If the parse process gets interrupted, this method is called by - * the parser to notify the DTD that interruption will occur. - * @update gess5/18/98 - * @return ignored - */ - NS_IMETHOD WillInterruptParse(nsIContentSink* aSink) = 0; - - /** - * This method is called to determine whether or not a tag of one - * type can contain a tag of another type. - * - * @update gess 3/25/98 - * @param aParent -- int tag of parent container - * @param aChild -- int tag of child container - * @return PR_TRUE if parent can contain child - */ - NS_IMETHOD_(PRBool) CanContain(PRInt32 aParent,PRInt32 aChild) const = 0; - - /** - * This method gets called to determine whether a given - * tag is itself a container - * - * @update gess 3/25/98 - * @param aTag -- tag to test for containership - * @return PR_TRUE if given tag can contain other tags - */ - NS_IMETHOD_(PRBool) IsContainer(PRInt32 aTag) const = 0; - - /** - * Use this id you want to stop the building content model - * --------------[ Sets DTD to STOP mode ]---------------- - * It's recommended to use this method in accordance with - * the parser's terminate() method. - * - * @update harishd 07/22/99 - * @param - * @return - */ - NS_IMETHOD_(void) Terminate() = 0; - - NS_IMETHOD_(PRInt32) GetType() = 0; - - NS_IMETHOD CollectSkippedContent(PRInt32 aTag, nsAString& aContent, PRInt32 &aLineNo) = 0; - -/* XXX Temporary measure, pending further work by RickG */ - - - // Whaaaa! These are useless methods, use nsIParserService! - - - /** - * Give rest of world access to our tag enums, so that CanContain(), etc, - * become useful. - */ - NS_IMETHOD StringTagToIntTag(const nsAString &aTag, - PRInt32* aIntTag) const = 0; - - NS_IMETHOD_(const PRUnichar *) IntTagToStringTag(PRInt32 aIntTag) const = 0; - - NS_IMETHOD_(nsIAtom *) IntTagToAtom(PRInt32 aIntTag) const = 0; - - NS_IMETHOD_(PRBool) IsBlockElement(PRInt32 aTagID, - PRInt32 aParentID) const = 0; - - NS_IMETHOD_(PRBool) IsInlineElement(PRInt32 aTagID, - PRInt32 aParentID) const = 0; -}; - -#define NS_DECL_NSIDTD \ - NS_IMETHOD_(const nsIID&) GetMostDerivedIID(void) const;\ - NS_IMETHOD CreateNewInstance(nsIDTD** aInstancePtrResult);\ - NS_IMETHOD_(eAutoDetectResult) CanParse(CParserContext& aParserContext, const nsString& aBuffer, PRInt32 aVersion);\ - NS_IMETHOD WillBuildModel( const CParserContext& aParserContext, nsITokenizer* aTokenizer, nsIContentSink* aSink);\ - NS_IMETHOD DidBuildModel(nsresult anErrorCode,PRBool aNotifySink,nsIParser* aParser,nsIContentSink* aSink);\ - NS_IMETHOD BuildModel(nsIParser* aParser,nsITokenizer* aTokenizer,nsITokenObserver* anObserver,nsIContentSink* aSink);\ - NS_IMETHOD HandleToken(CToken* aToken,nsIParser* aParser);\ - NS_IMETHOD WillResumeParse(nsIContentSink* aSink = 0);\ - NS_IMETHOD WillInterruptParse(nsIContentSink* aSink = 0);\ - NS_IMETHOD_(PRBool) CanContain(PRInt32 aParent,PRInt32 aChild) const;\ - NS_IMETHOD_(PRBool) IsContainer(PRInt32 aTag) const;\ - NS_IMETHOD CollectSkippedContent(PRInt32 aTag, nsAString& aContent, PRInt32 &aLineNo);\ - NS_IMETHOD_(void) Terminate();\ - NS_IMETHOD_(PRInt32) GetType(); \ - NS_IMETHOD StringTagToIntTag(const nsAString &aTag, PRInt32* aIntTag) const ;\ - NS_IMETHOD_(const PRUnichar *) IntTagToStringTag(PRInt32 aIntTag) const ;\ - NS_IMETHOD_(nsIAtom *) IntTagToAtom(PRInt32 aIntTag) const;\ - NS_IMETHOD_(PRBool) IsBlockElement(PRInt32 aTagID,PRInt32 aParentID) const;\ - NS_IMETHOD_(PRBool) IsInlineElement(PRInt32 aTagID,PRInt32 aParentID) const; -#endif /* nsIDTD_h___ */ diff --git a/htmlparser/public/nsIElementObserver.h b/htmlparser/public/nsIElementObserver.h deleted file mode 100644 index ce995842128a..000000000000 --- a/htmlparser/public/nsIElementObserver.h +++ /dev/null @@ -1,90 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1999 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - - -/** - * MODULE NOTES: - * @update rickg 03.23.2000 //removed unused NS_PARSER_SUBJECT and predecl of nsString - * - */ - -#ifndef nsIElementObserver_h__ -#define nsIElementObserver_h__ - -#include "nsISupports.h" -#include "prtypes.h" -#include "nsHTMLTags.h" -#include "nsVoidArray.h" - - -// {4672AA04-F6AE-11d2-B3B7-00805F8A6670} -#define NS_IELEMENTOBSERVER_IID \ -{ 0x4672aa04, 0xf6ae, 0x11d2, { 0xb3, 0xb7, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } } - - -class nsIElementObserver : public nsISupports { -public: - NS_DEFINE_STATIC_IID_ACCESSOR(NS_IELEMENTOBSERVER_IID) - - enum { IS_DOCUMENT_WRITE = 1U }; - /* - * Subject call observer when the parser hit the tag - * @param aDocumentID- ID of the document - * @param aTag- the tag - * @param numOfAttributes - number of attributes - * @param nameArray - array of name. - * @param valueArray - array of value - */ - NS_IMETHOD Notify(PRUint32 aDocumentID, eHTMLTags aTag, - PRUint32 numOfAttributes, const PRUnichar* nameArray[], - const PRUnichar* valueArray[]) = 0; - - NS_IMETHOD Notify(PRUint32 aDocumentID, const PRUnichar* aTag, - PRUint32 numOfAttributes, const PRUnichar* nameArray[], - const PRUnichar* valueArray[]) = 0; - - NS_IMETHOD Notify(nsISupports* aWebShell, - nsISupports* aChannel, - const PRUnichar* aTag, - const nsStringArray* aKeys, - const nsStringArray* aValues, - const PRUint32 aFlags) = 0; - -}; - -#endif /* nsIElementObserver_h__ */ - diff --git a/htmlparser/public/nsIExpatSink.idl b/htmlparser/public/nsIExpatSink.idl deleted file mode 100644 index 146d42e34956..000000000000 --- a/htmlparser/public/nsIExpatSink.idl +++ /dev/null @@ -1,131 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "nsISupports.idl" - -/** - * This interface should be implemented by any content sink that wants - * to get output from expat and do something with it; in other words, - * by any sink that handles some sort of XML dialect. - */ - -[scriptable, uuid(1DEEA160-C661-11d5-84CC-0010A4E0C706)] -interface nsIExpatSink : nsISupports -{ - /** - * Called to handle the opening tag of an element. - * @param aName the fully qualified tagname of the element - * @param aAtts the array of attribute names and values. There are - * aAttsCount/2 names and aAttsCount/2 values, so the total number of - * elements in the array is aAttsCount. The names and values - * alternate. Thus, if we number attributes starting with 0, - * aAtts[2*k] is the name of the k-th attribute and aAtts[2*k+1] is - * the value of that attribute Both explicitly specified attributes - * and attributes that are defined to have default values in a DTD are - * present in aAtts. - * @param aAttsCount the number of elements in aAtts. - * @param aIndex If the element has an attribute of type ID, then - * aAtts[aIndex] is the name of that attribute. Otherwise, aIndex - * is -1 - * @param aLineNumber the line number of the start tag in the data stream. - */ - void HandleStartElement(in wstring aName, - [array, size_is(aAttsCount)] in wstring aAtts, - in unsigned long aAttsCount, - in long aIndex, - in unsigned long aLineNumber); - - /** - * Called to handle the closing tag of an element. - * @param aName the fully qualified tagname of the element - */ - void HandleEndElement(in wstring aName); - - /** - * Called to handle a comment - * @param aCommentText the text of the comment (not including the - * "") - */ - void HandleComment(in wstring aCommentText); - - /** - * Called to handle a CDATA section - * @param aData the text in the CDATA section. This is null-terminated. - * @param aLength the length of the aData string - */ - void HandleCDataSection([size_is(aLength)] in wstring aData, - in unsigned long aLength); - - /** - * Called to handle the doctype declaration - */ - void HandleDoctypeDecl(in AString aSubset, - in AString aName, - in AString aSystemId, - in AString aPublicId, - in nsISupports aCatalogData); - - /** - * Called to handle character data. Note that this does NOT get - * called for the contents of CDATA sections. - * @param aData the data to handle. aData is NOT NULL-TERMINATED. - * @param aLength the length of the aData string - */ - void HandleCharacterData([size_is(aLength)] in wstring aData, - in unsigned long aLength); - - /** - * Called to handle a processing instruction - * @param aTarget the PI target (e.g. xml-stylesheet) - * @param aData all the rest of the data in the PI - */ - void HandleProcessingInstruction(in wstring aTarget, - in wstring aData); - - /** - * Handle the XML Declaration. - * - * @param aData The string. - * @param aLength The length of the declaration from - * opening '<' to closing '>'. - **/ - void HandleXMLDeclaration([size_is(aLength)] in wstring aData, - in unsigned long aLength); - - void ReportError(in wstring aErrorText, - in wstring aSourceText); -}; diff --git a/htmlparser/public/nsIHTMLContentSink.h b/htmlparser/public/nsIHTMLContentSink.h deleted file mode 100644 index 42fbed761376..000000000000 --- a/htmlparser/public/nsIHTMLContentSink.h +++ /dev/null @@ -1,329 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -#ifndef nsIHTMLContentSink_h___ -#define nsIHTMLContentSink_h___ - -/** - * MODULE NOTES: - * @update gess 4/1/98 - * - * This file declares the concrete HTMLContentSink class. - * This class is used during the parsing process as the - * primary interface between the parser and the content - * model. - * - * After the tokenizer completes, the parser iterates over - * the known token list. As the parser identifies valid - * elements, it calls the contentsink interface to notify - * the content model that a new node or child node is being - * created and added to the content model. - * - * The HTMLContentSink interface assumes 4 underlying - * containers: HTML, HEAD, BODY and FRAMESET. Before - * accessing any these, the parser will call the appropriate - * OpennsIHTMLContentSink method: OpenHTML,OpenHead,OpenBody,OpenFrameSet; - * likewise, the ClosensIHTMLContentSink version will be called when the - * parser is done with a given section. - * - * IMPORTANT: The parser may Open each container more than - * once! This is due to the irregular nature of HTML files. - * For example, it is possible to encounter plain text at - * the start of an HTML document (that preceeds the HTML tag). - * Such text is treated as if it were part of the body. - * In such cases, the parser will Open the body, pass the text- - * node in and then Close the body. The body will likely be - * re-Opened later when the actual tag has been seen. - * - * Containers within the body are Opened and Closed - * using the OpenContainer(...) and CloseContainer(...) calls. - * It is assumed that the document or contentSink is - * maintaining its state to manage where new content should - * be added to the underlying document. - * - * NOTE: OpenHTML() and OpenBody() may get called multiple times - * in the same document. That's fine, and it doesn't mean - * that we have multiple bodies or HTML's. - * - * NOTE: I haven't figured out how sub-documents (non-frames) - * are going to be handled. Stay tuned. - */ -#include "nsIParserNode.h" -#include "nsIContentSink.h" -#include "nsHTMLTags.h" - -#define NS_IHTML_CONTENT_SINK_IID \ - { 0xa6cf9051, 0x15b3, 0x11d2,{0x93, 0x2e, 0x00, 0x80, 0x5f, 0x8a, 0xdd, 0x32}} - - -#ifdef XP_MAC -#define MAX_REFLOW_DEPTH 75 //setting to 75 to prevent layout from crashing on mac. Bug 55095. -#else -#define MAX_REFLOW_DEPTH 200 //windows and linux (etc) can do much deeper structures. -#endif - -class nsIHTMLContentSink : public nsIContentSink -{ -public: - - NS_DEFINE_STATIC_IID_ACCESSOR(NS_IHTML_CONTENT_SINK_IID) - - /** - * This method gets called by the parser when it encounters - * a title tag and wants to set the document title in the sink. - * - * @update 4/1/98 gess - * @param nsString reference to new title value - */ - NS_IMETHOD SetTitle(const nsString& aValue) = 0; - - /** - * This method is used to open the outer HTML container. - * - * @update 4/1/98 gess - * @param nsIParserNode reference to parser node interface - */ - NS_IMETHOD OpenHTML(const nsIParserNode& aNode) = 0; - - /** - * This method is used to close the outer HTML container. - * - */ - NS_IMETHOD CloseHTML() = 0; - - /** - * This method is used to open the only HEAD container. - * - * @update 4/1/98 gess - * @param nsIParserNode reference to parser node interface - */ - NS_IMETHOD OpenHead(const nsIParserNode& aNode) = 0; - - /** - * This method is used to close the only HEAD container. - */ - NS_IMETHOD CloseHead() = 0; - - /** - * This method is used to open the main BODY container. - * - * @update 4/1/98 gess - * @param nsIParserNode reference to parser node interface - */ - NS_IMETHOD OpenBody(const nsIParserNode& aNode) = 0; - - /** - * This method is used to close the main BODY container. - * - */ - NS_IMETHOD CloseBody() = 0; - - /** - * This method is used to open a new FORM container. - * - * @update 4/1/98 gess - * @param nsIParserNode reference to parser node interface - */ - NS_IMETHOD OpenForm(const nsIParserNode& aNode) = 0; - - /** - * This method is used to close the outer FORM container. - * - */ - NS_IMETHOD CloseForm() = 0; - - /** - * This method is used to open a new MAP container. - * - * @update 4/1/98 gess - * @param nsIParserNode reference to parser node interface - */ - NS_IMETHOD OpenMap(const nsIParserNode& aNode) = 0; - - /** - * This method is used to close the MAP container. - * - */ - NS_IMETHOD CloseMap() = 0; - - /** - * This method is used to open the FRAMESET container. - * - * @update 4/1/98 gess - * @param nsIParserNode reference to parser node interface - */ - NS_IMETHOD OpenFrameset(const nsIParserNode& aNode) = 0; - - /** - * This method is used to close the FRAMESET container. - * - */ - NS_IMETHOD CloseFrameset() = 0; - - /** - * This gets called when handling illegal contents, especially - * in dealing with tables. This method creates a new context. - * - * @update 04/04/99 harishd - * @param aPosition - The position from where the new context begins. - */ - NS_IMETHOD BeginContext(PRInt32 aPosition) = 0; - - /** - * This method terminates any new context that got created by - * BeginContext and switches back to the main context. - * - * @update 04/04/99 harishd - * @param aPosition - Validates the end of a context. - */ - NS_IMETHOD EndContext(PRInt32 aPosition) = 0; - - /** - * @update 01/09/2003 harishd - * @param aTag - Check if this tag is enabled or not. - */ - NS_IMETHOD IsEnabled(PRInt32 aTag, PRBool* aReturn) = 0; - - /** - * This method is called when parser is about to begin - * synchronously processing a chunk of tokens. - */ - NS_IMETHOD WillProcessTokens(void) = 0; - - /** - * This method is called when parser has - * completed processing a chunk of tokens. The processing of the - * tokens may be interrupted by returning NS_ERROR_HTMLPARSER_INTERRUPTED from - * DidProcessAToken. - */ - NS_IMETHOD DidProcessTokens() = 0; - - /** - * This method is called when parser is about to - * process a single token - */ - NS_IMETHOD WillProcessAToken(void) = 0; - - /** - * This method is called when parser has completed - * the processing for a single token. - * @return NS_OK if processing should not be interrupted - * NS_ERROR_HTMLPARSER_INTERRUPTED if the parsing should be interrupted - */ - NS_IMETHOD DidProcessAToken(void) = 0; - - /** - * This method is used to open a generic container in the sink. - * - * @update 4/1/98 gess - * @param nsIParserNode reference to parser node interface - */ - NS_IMETHOD OpenContainer(const nsIParserNode& aNode) = 0; - - /** - * This method gets called by the parser when a close - * container tag has been consumed and needs to be closed. - * - * @param aTag - The tag to be closed. - */ - NS_IMETHOD CloseContainer(const nsHTMLTag aTag) = 0; - - /** - * This gets called by the parser to contents to - * the head container - * - */ - NS_IMETHOD AddHeadContent(const nsIParserNode& aNode) = 0; - - /** - * This gets called by the parser when you want to add - * a leaf node to the current container in the content - * model. - * - * @update 4/1/98 gess - * @param nsIParserNode reference to parser node interface - */ - NS_IMETHOD AddLeaf(const nsIParserNode& aNode) = 0; - - /** - * This gets called by the parser when you want to add - * a leaf node to the current container in the content - * model. - * - * @update 4/1/98 gess - * @param nsIParserNode reference to parser node interface - */ - NS_IMETHOD AddComment(const nsIParserNode& aNode) = 0; - - /** - * This gets called by the parser when you want to add - * a leaf node to the current container in the content - * model. - * - * @update 4/1/98 gess - * @param nsIParserNode reference to parser node interface - */ - NS_IMETHOD AddProcessingInstruction(const nsIParserNode& aNode) = 0; - - /** - * This method is called by the parser when it encounters - * a document type declaration. - * - * XXX Should the parser also part the internal subset? - * - * @param nsIParserNode reference to parser node interface - */ - NS_IMETHOD AddDocTypeDecl(const nsIParserNode& aNode) = 0; - - /** - * This gets called by the parser to notify observers of - * the tag - * - * @param aErrorResult the error code - */ - NS_IMETHOD NotifyTagObservers(nsIParserNode* aNode) = 0; - - /** - * Call this method to determnine if a FORM is on the sink's stack - * - * @return PR_TRUE if found else PR_FALSE - */ - NS_IMETHOD_(PRBool) IsFormOnStack() = 0; - -}; - -#endif /* nsIHTMLContentSink_h___ */ - diff --git a/htmlparser/public/nsIHTMLFragmentContentSink.h b/htmlparser/public/nsIHTMLFragmentContentSink.h deleted file mode 100644 index b91219cad6bc..000000000000 --- a/htmlparser/public/nsIHTMLFragmentContentSink.h +++ /dev/null @@ -1,75 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -#ifndef nsIHTMLFragmentContentSink_h___ -#define nsIHTMLFragmentContentSink_h___ - -#include "nsIHTMLContentSink.h" - -#define NS_HTMLFRAGMENTSINK_CONTRACTID "@mozilla.org/layout/htmlfragmentsink;1" -#define NS_HTMLFRAGMENTSINK2_CONTRACTID "@mozilla.org/layout/htmlfragmentsink;2" - -class nsIDOMDocumentFragment; -class nsIDocument; - -#define NS_IHTML_FRAGMENT_CONTENT_SINK_IID \ - {0xa6cf9102, 0x15b3, 0x11d2, \ - {0x93, 0x2e, 0x00, 0x80, 0x5f, 0x8a, 0xdd, 0x32}} - -class nsIHTMLFragmentContentSink : public nsIHTMLContentSink { -public: - /** - * This method is used to obtain the fragment created by - * a fragment content sink. The value returned will be null - * if the content sink hasn't yet received parser notifications. - * - */ - NS_IMETHOD GetFragment(nsIDOMDocumentFragment** aFragment) = 0; - - /** - * This method is used to set the target document for this fragment - * sink. This document's nodeinfo manager will be used to create - * the content objects. This MUST be called before the sink is used. - * - * If aDocument is null or has no nodeinfo manager, the sink will - * create a brand-new nodeinfo manager. - * - * @param aDocument the document the new nodes will belong to - */ - NS_IMETHOD SetTargetDocument(nsIDocument* aDocument) = 0; -}; - -#endif diff --git a/htmlparser/public/nsILoggingSink.h b/htmlparser/public/nsILoggingSink.h deleted file mode 100644 index ba39191a333f..000000000000 --- a/htmlparser/public/nsILoggingSink.h +++ /dev/null @@ -1,55 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -#ifndef nsILoggingSink_h___ -#define nsILoggingSink_h___ - -#include "nsIHTMLContentSink.h" -#include "nsString.h" -#include "prprf.h" - -// IID for nsILoggingSink -#define NS_ILOGGING_SINK_IID \ - {0xa6cf9061, 0x15b3, 0x11d2,{0x93, 0x2e, 0x00, 0x80, 0x5f, 0x8a, 0xdd, 0x32}} - -class nsILoggingSink : public nsIHTMLContentSink { -public: - NS_IMETHOD SetOutputStream(PRFileDesc *aStream,PRBool autoDelete=PR_FALSE) =0; -}; - -extern "C" nsresult NS_NewHTMLLoggingSink(nsIContentSink** aInstancePtrResult); - -#endif /* nsILoggingSink_h___ */ diff --git a/htmlparser/public/nsIParser.h b/htmlparser/public/nsIParser.h deleted file mode 100644 index 28790aa2ce7e..000000000000 --- a/htmlparser/public/nsIParser.h +++ /dev/null @@ -1,387 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -#ifndef NS_IPARSER___ -#define NS_IPARSER___ - - -/** - * MODULE NOTES: - * @update gess 4/1/98 - * - * This class defines the iparser interface. This XPCOM - * inteface is all that parser clients ever need to see. - * - **/ - -#include "nsISupports.h" -#include "nsIStreamListener.h" -#include "nsIDTD.h" -#include "nsIInputStream.h" -#include "nsHashtable.h" -#include "nsVoidArray.h" - -#define NS_IPARSER_IID \ - {0x355cbba0, 0xbf7d, 0x11d1, \ - {0xaa, 0xd9, 0x00, 0x80, 0x5f, 0x8a, 0x3e, 0x14}} - -// {41421C60-310A-11d4-816F-000064657374} -#define NS_IDEBUG_DUMP_CONTENT_IID \ -{ 0x41421c60, 0x310a, 0x11d4, { 0x81, 0x6f, 0x0, 0x0, 0x64, 0x65, 0x73, 0x74 } }; - -class nsIContentSink; -class nsIRequestObserver; -class nsIParserFilter; -class nsString; -class nsIURI; -class nsIChannel; - -enum eParserCommands { - eViewNormal, - eViewSource, - eViewFragment, - eViewErrors -}; - -enum eCRCQuality { - eCRCGood = 0, - eCRCFair, - eCRCPoor -}; - - -enum eParserDocType { - ePlainText = 0, - eXML, - eHTML_Quirks, - eHTML3_Quirks, // separate, for editor output, since HTML pre-4.0 lacks tbody - eHTML_Strict -}; - - -// define Charset source constants -// note: the value order define the priority -#define kCharsetUninitialized 0 -#define kCharsetFromWeakDocTypeDefault 1 -#define kCharsetFromUserDefault 2 -#define kCharsetFromDocTypeDefault 3 -#define kCharsetFromCache 4 -#define kCharsetFromParentFrame 5 -#define kCharsetFromBookmarks 6 -#define kCharsetFromAutoDetection 7 -#define kCharsetFromHintPrevDoc 8 -#define kCharsetFromMetaTag 9 -#define kCharsetFromByteOrderMark 10 -#define kCharsetFromChannel 11 -#define kCharsetFromParentForced 12 -#define kCharsetFromUserForced 13 -#define kCharsetFromOtherComponent 14 -#define kCharsetFromPreviousLoading 15 - -enum eStreamState {eNone,eOnStart,eOnDataAvail,eOnStop}; - -/** - * FOR DEBUG PURPOSE ONLY - * - * Use this interface to query objects that contain content information. - * Ex. Parser can trigger dump content by querying the sink that has - * access to the content. - * - * @update harishd 05/25/00 - */ -class nsIDebugDumpContent : public nsISupports { -public: - NS_DEFINE_STATIC_IID_ACCESSOR(NS_IDEBUG_DUMP_CONTENT_IID) - NS_IMETHOD DumpContentModel()=0; -}; - -/** - * This class defines the iparser interface. This XPCOM - * inteface is all that parser clients ever need to see. - * - * @update gess 3/25/98 - */ -class nsIParser : public nsISupports { - public: - - NS_DEFINE_STATIC_IID_ACCESSOR(NS_IPARSER_IID) - - /** - * Call this method if you have a DTD that you want to share with the parser. - * Registered DTD's get remembered until the system shuts down. - * - * @update gess 3/25/98 - * @param aDTD -- ptr DTD that you're publishing the services of - */ - NS_IMETHOD RegisterDTD(nsIDTD* aDTD)=0; - - - /** - * Select given content sink into parser for parser output - * @update gess5/11/98 - * @param aSink is the new sink to be used by parser - * @return - */ - NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink)=0; - - - /** - * retrieve the sink set into the parser - * @update gess5/11/98 - * @return current sink - */ - NS_IMETHOD_(nsIContentSink*) GetContentSink(void)=0; - - /** - * Call this method once you've created a parser, and want to instruct it - * about the command which caused the parser to be constructed. For example, - * this allows us to select a DTD which can do, say, view-source. - * - * @update gess 3/25/98 - * @param aCommand -- ptrs to string that contains command - * @return nada - */ - NS_IMETHOD_(void) GetCommand(nsString& aCommand)=0; - NS_IMETHOD_(void) SetCommand(const char* aCommand)=0; - NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand)=0; - - /** - * Call this method once you've created a parser, and want to instruct it - * about what charset to load - * - * @update ftang 4/23/99 - * @param aCharset- the charest of a document - * @param aCharsetSource- the soure of the chares - * @return nada - */ - NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, PRInt32 aSource)=0; - NS_IMETHOD_(void) GetDocumentCharset(nsACString& oCharset, PRInt32& oSource)=0; - - NS_IMETHOD_(void) SetParserFilter(nsIParserFilter* aFilter) = 0; - - /** - * Get the channel associated with this parser - * @update harishd,gagan 07/17/01 - * @param aChannel out param that will contain the result - * @return NS_OK if successful - */ - NS_IMETHOD GetChannel(nsIChannel** aChannel) = 0; - - /** - * Get the DTD associated with this parser - * @update vidur 9/29/99 - * @param aDTD out param that will contain the result - * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error - */ - NS_IMETHOD GetDTD(nsIDTD** aDTD) = 0; - - /****************************************************************************************** - * Parse methods always begin with an input source, and perform conversions - * until you wind up being emitted to the given contentsink (which may or may not - * be a proxy for the NGLayout content model). - ******************************************************************************************/ - - // Call this method to resume the parser from the blocked state.. - NS_IMETHOD ContinueParsing() = 0; - - // Stops parsing temporarily. - NS_IMETHOD_(void) BlockParser() = 0; - - // Open up the parser for tokenization, building up content - // model..etc. However, this method does not resume parsing - // automatically. It's the callers' responsibility to restart - // the parsing engine. - NS_IMETHOD_(void) UnblockParser() = 0; - - NS_IMETHOD_(PRBool) IsParserEnabled() = 0; - NS_IMETHOD_(PRBool) IsComplete() = 0; - - NS_IMETHOD Parse(nsIURI* aURL, - nsIRequestObserver* aListener = nsnull, - PRBool aEnableVerify = PR_FALSE, - void* aKey = 0, - nsDTDMode aMode = eDTDMode_autodetect) = 0; - NS_IMETHOD Parse(nsIInputStream* aStream, - const nsACString& aMimeType, - PRBool aEnableVerify = PR_FALSE, - void* aKey = 0, - nsDTDMode aMode = eDTDMode_autodetect) = 0; - NS_IMETHOD Parse(const nsAString& aSourceBuffer, - void* aKey, - const nsACString& aMimeType, - PRBool aEnableVerify, - PRBool aLastCall, - nsDTDMode aMode = eDTDMode_autodetect) = 0; - - NS_IMETHOD Terminate(void) = 0; - - NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer, - void* aKey, - nsVoidArray& aTagStack, - PRUint32 anInsertPos, - const nsACString& aContentType, - nsDTDMode aMode = eDTDMode_autodetect) = 0; - - /** - * This method gets called when the tokens have been consumed, and it's time - * to build the model via the content sink. - * @update gess5/11/98 - * @return error code -- 0 if model building went well . - */ - NS_IMETHOD BuildModel(void) = 0; - - - /** - * Retrieve the parse mode from the parser... - * - * @update gess 6/9/98 - * @return ptr to scanner - */ - NS_IMETHOD_(nsDTDMode) GetParseMode(void) = 0; - - /** - * Call this method to cancel any pending parsing events. - * Parsing events may be pending if all of the document's content - * has been passed to the parser but the parser has been interrupted - * because processing the tokens took too long. - * - * @update kmcclusk 05/18/01 - * @return NS_OK if succeeded else ERROR. - */ - - NS_IMETHOD CancelParsingEvents() = 0; -}; - -/* ===========================================================* - Some useful constants... - * ===========================================================*/ - -#include "prtypes.h" -#include "nsError.h" - -#define NS_ERROR_HTMLPARSER_EOF NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1000) -#define NS_ERROR_HTMLPARSER_UNKNOWN NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1001) -#define NS_ERROR_HTMLPARSER_CANTPROPAGATE NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1002) -#define NS_ERROR_HTMLPARSER_CONTEXTMISMATCH NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1003) -#define NS_ERROR_HTMLPARSER_BADFILENAME NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1004) -#define NS_ERROR_HTMLPARSER_BADURL NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1005) -#define NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1006) -#define NS_ERROR_HTMLPARSER_INTERRUPTED NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1007) -#define NS_ERROR_HTMLPARSER_BLOCK NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1008) -#define NS_ERROR_HTMLPARSER_BADTOKENIZER NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1009) -#define NS_ERROR_HTMLPARSER_BADATTRIBUTE NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1010) -#define NS_ERROR_HTMLPARSER_UNRESOLVEDDTD NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1011) -#define NS_ERROR_HTMLPARSER_MISPLACEDTABLECONTENT NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1012) -#define NS_ERROR_HTMLPARSER_BADDTD NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1013) -#define NS_ERROR_HTMLPARSER_BADCONTEXT NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1014) -#define NS_ERROR_HTMLPARSER_STOPPARSING NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1015) -#define NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1016) -#define NS_ERROR_HTMLPARSER_HIERARCHYTOODEEP NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1017) - - -#define NS_ERROR_HTMLPARSER_CONTINUE NS_OK - - -const PRUint32 kEOF = NS_ERROR_HTMLPARSER_EOF; -const PRUint32 kUnknownError = NS_ERROR_HTMLPARSER_UNKNOWN; -const PRUint32 kCantPropagate = NS_ERROR_HTMLPARSER_CANTPROPAGATE; -const PRUint32 kContextMismatch = NS_ERROR_HTMLPARSER_CONTEXTMISMATCH; -const PRUint32 kBadFilename = NS_ERROR_HTMLPARSER_BADFILENAME; -const PRUint32 kBadURL = NS_ERROR_HTMLPARSER_BADURL; -const PRUint32 kInvalidParserContext = NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT; -const PRUint32 kBlocked = NS_ERROR_HTMLPARSER_BLOCK; -const PRUint32 kBadStringLiteral = NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL; -const PRUint32 kHierarchyTooDeep = NS_ERROR_HTMLPARSER_HIERARCHYTOODEEP; - -const PRUnichar kNewLine = '\n'; -const PRUnichar kCR = '\r'; -const PRUnichar kLF = '\n'; -const PRUnichar kTab = '\t'; -const PRUnichar kSpace = ' '; -const PRUnichar kQuote = '"'; -const PRUnichar kApostrophe = '\''; -const PRUnichar kLessThan = '<'; -const PRUnichar kGreaterThan = '>'; -const PRUnichar kAmpersand = '&'; -const PRUnichar kForwardSlash = '/'; -const PRUnichar kBackSlash = '\\'; -const PRUnichar kEqual = '='; -const PRUnichar kMinus = '-'; -const PRUnichar kPlus = '+'; -const PRUnichar kExclamation = '!'; -const PRUnichar kSemicolon = ';'; -const PRUnichar kHashsign = '#'; -const PRUnichar kAsterisk = '*'; -const PRUnichar kUnderbar = '_'; -const PRUnichar kComma = ','; -const PRUnichar kLeftParen = '('; -const PRUnichar kRightParen = ')'; -const PRUnichar kLeftBrace = '{'; -const PRUnichar kRightBrace = '}'; -const PRUnichar kQuestionMark = '?'; -const PRUnichar kLeftSquareBracket = '['; -const PRUnichar kRightSquareBracket = ']'; -const PRUnichar kNullCh = '\0'; - -#define kHTMLTextContentType "text/html" -#define kXMLTextContentType "text/xml" -#define kXMLApplicationContentType "application/xml" -#define kXHTMLApplicationContentType "application/xhtml+xml" -#define kXULTextContentType "application/vnd.mozilla.xul+xml" -#define kRDFTextContentType "text/rdf" -#define kXIFTextContentType "text/xif" -#define kPlainTextContentType "text/plain" -#define kViewSourceCommand "view-source" -#define kViewFragmentCommand "view-fragment" -#define kTextCSSContentType "text/css" -#define kApplicationJSContentType "application/x-javascript" -#define kTextJSContentType "text/javascript" -#define kSGMLTextContentType "text/sgml" -#define kSVGTextContentType "image/svg+xml" - -#define NS_IPARSER_FLAG_UNKNOWN_MODE 0x00000000 -#define NS_IPARSER_FLAG_QUIRKS_MODE 0x00000002 -#define NS_IPARSER_FLAG_STRICT_MODE 0x00000004 -#define NS_IPARSER_FLAG_AUTO_DETECT_MODE 0x00000010 -#define NS_IPARSER_FLAG_VIEW_NORMAL 0x00000020 -#define NS_IPARSER_FLAG_VIEW_SOURCE 0x00000040 -#define NS_IPARSER_FLAG_VIEW_ERRORS 0x00000080 -#define NS_IPARSER_FLAG_PRESERVE_CONTENT 0x00000100 -#define NS_IPARSER_FLAG_PLAIN_TEXT 0x00000200 -#define NS_IPARSER_FLAG_XML 0x00000400 -#define NS_IPARSER_FLAG_HTML 0x00000800 - -#endif diff --git a/htmlparser/public/nsIParserFilter.h b/htmlparser/public/nsIParserFilter.h deleted file mode 100644 index 030b32064cd8..000000000000 --- a/htmlparser/public/nsIParserFilter.h +++ /dev/null @@ -1,78 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -/** - * MODULE NOTES: - * @update jevering 6/17/98 - * - * This interface is not yet used; it was intended to allow an observer object - * to "look at" the i/o stream coming into the parser before, during and after - * the parser saw it. The intention of this was to allow an observer to modify - * the stream at various stages. - */ - -#ifndef IPARSERFILTER -#define IPARSERFILTER - -#include "nsISupports.h" - -class CToken; - -#define NS_IPARSERFILTER_IID \ - {0x14d6ff0, 0x0610, 0x11d2, \ - {0x8c, 0x3f, 0x00, 0x80, 0x5f, 0x8a, 0x1d, 0xb7}} - - -class nsIParserFilter : public nsISupports { - public: - - NS_DEFINE_STATIC_IID_ACCESSOR(NS_IPARSERFILTER_IID) - - NS_IMETHOD RawBuffer(const char * buffer, PRUint32 * buffer_length) = 0; - - NS_IMETHOD WillAddToken(CToken & token) = 0; - - NS_IMETHOD ProcessTokens( /* dont know what goes here yet */ void ) = 0; - - NS_IMETHOD Finish() = 0; - -}; - - - -#endif - diff --git a/htmlparser/public/nsIParserNode.h b/htmlparser/public/nsIParserNode.h deleted file mode 100644 index cb17e96f145f..000000000000 --- a/htmlparser/public/nsIParserNode.h +++ /dev/null @@ -1,179 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - - -/** - * MODULE NOTES: - * @update gess 4/1/98 - * - * This class is defines the basic interface between the - * parser and the content sink. The parser will iterate - * over the collection of tokens that it sees from the - * tokenizer, coverting each related "group" into one of - * these. This object gets passed to the sink, and is - * then immediately reused. - * - * If you want to hang onto one of these, you should - * make your own copy. - * - */ - -#ifndef NS_IPARSERNODE__ -#define NS_IPARSERNODE__ - -#include "nsISupports.h" -#include "prtypes.h" -#include "nsString.h" -#include "nsDebug.h" - -//#define HEAP_ALLOCATED_NODES -//#define DEBUG_TRACK_NODES - -class nsIAtom; -class CToken; - -// 6e59f160-2717-11d2-9246-00805f8a7ab6 -#define NS_IPARSER_NODE_IID \ - {0x6e59f160, 0x2717, 0x11d1, \ - {0x92, 0x46, 0x00, 0x80, 0x5f, 0x8a, 0x7a, 0xb6}} - -/** - * Parser nodes are the unit of exchange between the - * parser and the content sink. Nodes offer access to - * the current token, its attributes, and its skipped- - * content if applicable. - * - * @update gess 3/25/98 - */ -class nsIParserNode { // XXX Should be nsAParserNode - - public: - - - /** - * Retrieve the name of the node - * @update gess5/11/98 - * @return string containing node name - */ - virtual const nsAString& GetTagName() const = 0; //to get name of tag - - /** - * Retrieve the text from the given node - * @update gess5/11/98 - * @return string containing node text - */ - virtual const nsAString& GetText() const = 0; //get plain text if available - - /** - * Retrieve the type of the parser node. - * @update gess5/11/98 - * @return node type. - */ - virtual PRInt32 GetNodeType() const =0; - - /** - * Retrieve token type of parser node - * @update gess5/11/98 - * @return token type - */ - virtual PRInt32 GetTokenType() const =0; - - /** - * Retrieve the number of attributes in this node. - * @update gess5/11/98 - * @return count of attributes (may be 0) - */ - virtual PRInt32 GetAttributeCount(PRBool askToken=PR_FALSE) const =0; - - /** - * Retrieve the key (of key/value pair) at given index - * @update gess5/11/98 - * @param anIndex is the index of the key you want - * @return string containing key. - */ - virtual const nsAString& GetKeyAt(PRUint32 anIndex) const = 0; - - /** - * Retrieve the value (of key/value pair) at given index - * @update gess5/11/98 - * @param anIndex is the index of the value you want - * @return string containing value. - */ - virtual const nsAString& GetValueAt(PRUint32 anIndex) const = 0; - - /** - * NOTE: When the node is an entity, this will translate the entity - * to it's unicode value, and store it in aString. - * @update gess5/11/98 - * @param aString will contain the resulting unicode string value - * @return int (unicode char or unicode index from table) - */ - virtual PRInt32 TranslateToUnicodeStr(nsString& aString) const = 0; - - - virtual void AddAttribute(CToken* aToken)=0; - - /** - * This getter retrieves the line number from the input source where - * the token occured. Lines are interpreted as occuring between \n characters. - * @update gess7/24/98 - * @return int containing the line number the token was found on - */ - virtual PRInt32 GetSourceLineNumber(void) const =0; - - /** - * This pair of methods allows us to set a generic bit (for arbitrary use) - * on each node stored in the context. - * @update gess 11May2000 - */ - virtual PRBool GetGenericState(void) const =0; - virtual void SetGenericState(PRBool aState) =0; - - /** Retrieve a string containing the tag and its attributes in "source" form - * @update rickg 06June2000 - * @return void - */ - virtual void GetSource(nsString& aString)=0; - - /** Release all the objects you're holding - * @update harishd 08/02/00 - * @return void - */ - virtual nsresult ReleaseAll()=0; -}; - -#endif diff --git a/htmlparser/public/nsIParserService.h b/htmlparser/public/nsIParserService.h deleted file mode 100644 index c1884980d84a..000000000000 --- a/htmlparser/public/nsIParserService.h +++ /dev/null @@ -1,113 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef nsIParserService_h__ -#define nsIParserService_h__ - -#include "nsISupports.h" -#include "nsString.h" -#include "nsHTMLTags.h" -#include "nsIParserNode.h" -#include "nsIParser.h" -#include "nsVoidArray.h" -#include "nsIElementObserver.h" - -#define NS_PARSER_CONTRACTID_PREFIX "@mozilla.org/parser" - -// {90a92e37-abd6-441b-9b39-4064d98e1ede} -#define NS_IPARSERSERVICE_IID \ -{ 0x90a92e37, 0xabd6, 0x441b, { 0x9b, 0x39, 0x40, 0x64, 0xd9, 0x8e, 0x1e, 0xde } }; - -// {78081E70-AD53-11d5-8498-0010A4E0C706} -#define NS_IOBSERVERENTRY_IID \ -{ 0x78081e70, 0xad53, 0x11d5, { 0x84, 0x98, 0x00, 0x10, 0xa4, 0xe0, 0xc7, 0x06 } }; - - -class nsIObserverEntry : public nsISupports { - public: - NS_DEFINE_STATIC_IID_ACCESSOR(NS_IOBSERVERENTRY_IID) - - NS_IMETHOD Notify(nsIParserNode* aNode, - nsIParser* aParser, - nsISupports* aWebShell, - const PRUint32 aFlags) = 0; - -}; - - -class nsIParserService : public nsISupports { - public: - NS_DEFINE_STATIC_IID_ACCESSOR(NS_IPARSERSERVICE_IID) - - NS_IMETHOD HTMLAtomTagToId(nsIAtom* aAtom, PRInt32* aId) const = 0; - - NS_IMETHOD HTMLCaseSensitiveAtomTagToId(nsIAtom* aAtom, - PRInt32* aId) const = 0; - - NS_IMETHOD HTMLStringTagToId(const nsAString &aTagName, - PRInt32* aId) const = 0; - - NS_IMETHOD HTMLIdToStringTag(PRInt32 aId, - const PRUnichar **aTagName) const = 0; - - NS_IMETHOD HTMLConvertEntityToUnicode(const nsAString& aEntity, - PRInt32* aUnicode) const = 0; - - NS_IMETHOD HTMLConvertUnicodeToEntity(PRInt32 aUnicode, - nsCString& aEntity) const = 0; - - NS_IMETHOD IsContainer(PRInt32 aId, PRBool& aIsContainer) const = 0; - NS_IMETHOD IsBlock(PRInt32 aId, PRBool& aIsBlock) const = 0; - - // Observer mechanism - NS_IMETHOD RegisterObserver(nsIElementObserver* aObserver, - const nsAString& aTopic, - const eHTMLTags* aTags = nsnull) = 0; - - NS_IMETHOD UnregisterObserver(nsIElementObserver* aObserver, - const nsAString& aTopic) = 0; - NS_IMETHOD GetTopicObservers(const nsAString& aTopic, - nsIObserverEntry** aEntry) = 0; - - virtual nsresult CheckQName(const nsASingleFragmentString& aQName, - PRBool aNamespaceAware, - const PRUnichar** aColon) = 0; - virtual PRBool IsXMLLetter(PRUnichar aChar) = 0; - virtual PRBool IsXMLNCNameChar(PRUnichar aChar) = 0; -}; - -#endif // nsIParserService_h__ diff --git a/htmlparser/public/nsITokenizer.h b/htmlparser/public/nsITokenizer.h deleted file mode 100644 index 11b243770255..000000000000 --- a/htmlparser/public/nsITokenizer.h +++ /dev/null @@ -1,108 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - - -/** - * MODULE NOTES: - * @update gess 4/1/98 - * - */ - -#ifndef __NSITOKENIZER__ -#define __NSITOKENIZER__ - -#include "nsISupports.h" -#include "prtypes.h" - -class CToken; -class nsScanner; -class nsDeque; -class nsTokenAllocator; - -#define NS_ITOKENIZER_IID \ - {0xe4238ddc, 0x9eb6, 0x11d2, {0xba, 0xa5, 0x0, 0x10, 0x4b, 0x98, 0x3f, 0xd4 }} - -/** - * This interface is used as a callback to objects interested - * in observing the token stream created from the parse process. - */ -class nsITokenObserver { -public: - virtual PRBool operator()(CToken* aToken)=0; -}; - -/*************************************************************** - Notes: - ***************************************************************/ - - -class nsITokenizer : public nsISupports { -public: - NS_DEFINE_STATIC_IID_ACCESSOR(NS_ITOKENIZER_IID) - - NS_IMETHOD WillTokenize(PRBool aIsFinalChunk,nsTokenAllocator* aTokenAllocator)=0; - NS_IMETHOD ConsumeToken(nsScanner& aScanner,PRBool& aFlushTokens)=0; - NS_IMETHOD DidTokenize(PRBool aIsFinalChunk)=0; - - NS_IMETHOD_(CToken*) PushTokenFront(CToken* aToken)=0; - NS_IMETHOD_(CToken*) PushToken(CToken* aToken)=0; - NS_IMETHOD_(CToken*) PopToken(void)=0; - NS_IMETHOD_(CToken*) PeekToken(void)=0; - NS_IMETHOD_(CToken*) GetTokenAt(PRInt32 anIndex)=0; - NS_IMETHOD_(PRInt32) GetCount(void)=0; - NS_IMETHOD_(nsTokenAllocator*) GetTokenAllocator(void)=0; - NS_IMETHOD_(void) PrependTokens(nsDeque& aDeque)=0; - NS_IMETHOD CopyState(nsITokenizer* aTokenizer) = 0; - -}; - -#define NS_DECL_NSITOKENIZER \ - NS_IMETHOD WillTokenize(PRBool aIsFinalChunk,nsTokenAllocator* aTokenAllocator);\ - NS_IMETHOD ConsumeToken(nsScanner& aScanner,PRBool& aFlushTokens);\ - NS_IMETHOD DidTokenize(PRBool aIsFinalChunk);\ - NS_IMETHOD_(CToken*) PushTokenFront(CToken* aToken);\ - NS_IMETHOD_(CToken*) PushToken(CToken* aToken);\ - NS_IMETHOD_(CToken*) PopToken(void);\ - NS_IMETHOD_(CToken*) PeekToken(void);\ - NS_IMETHOD_(CToken*) GetTokenAt(PRInt32 anIndex);\ - NS_IMETHOD_(PRInt32) GetCount(void);\ - NS_IMETHOD_(nsTokenAllocator*) GetTokenAllocator(void);\ - NS_IMETHOD_(void) PrependTokens(nsDeque& aDeque);\ - NS_IMETHOD CopyState(nsITokenizer* aTokenizer); - - -#endif diff --git a/htmlparser/public/nsParserCIID.h b/htmlparser/public/nsParserCIID.h deleted file mode 100644 index a3758cc6f9d3..000000000000 --- a/htmlparser/public/nsParserCIID.h +++ /dev/null @@ -1,83 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef nsParserCIID_h__ -#define nsParserCIID_h__ - -#include "nsISupports.h" -#include "nsIFactory.h" -#include "nsIComponentManager.h" - -// {2ce606b0-bee6-11d1-aad9-00805f8a3e14} -#define NS_PARSER_CID \ -{ 0x2ce606b0, 0xbee6, 0x11d1, { 0xaa, 0xd9, 0x0, 0x80, 0x5f, 0x8a, 0x3e, 0x14 } } - -// XXX: This object should not be exposed outside of the parser. -// Remove when CNavDTD subclasses do not need access -#define NS_PARSER_NODE_IID \ - {0x9039c670, 0x2717, 0x11d2, \ - {0x92, 0x46, 0x00, 0x80, 0x5f, 0x8a, 0x7a, 0xb6}} - -// {a6cf9107-15b3-11d2-932e-00805f8add32} -#define NS_CNAVDTD_CID \ -{ 0xa6cf9107, 0x15b3, 0x11d2, { 0x93, 0x2e, 0x0, 0x80, 0x5f, 0x8a, 0xdd, 0x32 } } - -// Class IID for the logging sink -#define NS_LOGGING_SINK_CID \ - {0xa6cf9060, 0x15b3, 0x11d2,{0x93, 0x2e, 0x00, 0x80, 0x5f, 0x8a, 0xdd, 0x32}} - -// {4611d482-960a-11d4-8eb0-b617661b6f7c} -#define NS_CTRANSITIONAL_DTD_CID \ -{ 0x4611d482, 0x960a, 0x11d4, { 0x8e, 0xb0, 0xb6, 0x17, 0x66, 0x1b, 0x6f, 0x7c } } - -// {8323FAD0-2102-11d4-8142-000064657374} -#define NS_VIEWSOURCE_DTD_CID \ -{ 0x8323fad0, 0x2102, 0x11d4, { 0x81, 0x42, 0x0, 0x0, 0x64, 0x65, 0x73, 0x74 } } - -// {FFF4FBE9-528A-4b37-819D-FC18F3A401A7} -#define NS_EXPAT_DRIVER_CID \ -{ 0xfff4fbe9, 0x528a, 0x4b37, { 0x81, 0x9d, 0xfc, 0x18, 0xf3, 0xa4, 0x1, 0xa7 } } - -// {a6cf910f-15b3-11d2-932e-00805f8add32} -#define NS_HTMLCONTENTSINKSTREAM_CID \ -{ 0xa6cf910f, 0x15b3, 0x11d2, { 0x93, 0x2e, 0x0, 0x80, 0x5f, 0x8a, 0xdd, 0x32 } } - -// {a6cf9112-15b3-11d2-932e-00805f8add32} -#define NS_PARSERSERVICE_CID \ -{ 0xa6cf9112, 0x15b3, 0x11d2, { 0x93, 0x2e, 0x0, 0x80, 0x5f, 0x8a, 0xdd, 0x32 } } - -#endif diff --git a/htmlparser/public/nsParserError.h b/htmlparser/public/nsParserError.h deleted file mode 100644 index 8260ccc4fe7b..000000000000 --- a/htmlparser/public/nsParserError.h +++ /dev/null @@ -1,64 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - - -/** - * MODULE NOTES: - * @update nra 3/3/99 - * - * nsParserError structifies the notion of a parser error. - */ - - -#ifndef PARSERERROR__ -#define PARSERERROR__ - -#include "prtypes.h" -#include "nsString.h" - -typedef struct _nsParserError { - PRInt32 code; - PRInt32 lineNumber; - PRInt32 colNumber; - nsString description; - nsString sourceLine; - nsString sourceURL; -} nsParserError; - -#endif - - diff --git a/htmlparser/public/nsScannerString.h b/htmlparser/public/nsScannerString.h deleted file mode 100644 index ea8a0cf15319..000000000000 --- a/htmlparser/public/nsScannerString.h +++ /dev/null @@ -1,583 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* vim:set ts=2 sw=2 sts=2 et cindent: */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla. - * - * The Initial Developer of the Original Code is IBM Corporation. - * Portions created by IBM Corporation are Copyright (C) 2003 - * IBM Corporation. All Rights Reserved. - * - * Contributor(s): - * Darin Fisher - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef nsScannerString_h___ -#define nsScannerString_h___ - -#include "nsString.h" -#include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator -#include "prclist.h" - - - /** - * NOTE: nsScannerString (and the other classes defined in this file) are - * not related to nsAString or any of the other xpcom/string classes. - * - * nsScannerString is based on the nsSlidingString implementation that used - * to live in xpcom/string. Now that nsAString is limited to representing - * only single fragment strings, nsSlidingString can no longer be used. - * - * An advantage to this design is that it does not employ any virtual - * functions. - * - * This file uses SCC-style indenting in deference to the nsSlidingString - * code from which this code is derived ;-) - */ - -class nsScannerIterator; -class nsScannerSubstring; -class nsScannerString; - - - /** - * nsScannerBufferList - * - * This class maintains a list of heap-allocated Buffer objects. The buffers - * are maintained in a circular linked list. Each buffer has a usage count - * that is decremented by the owning nsScannerSubstring. - * - * The buffer list itself is reference counted. This allows the buffer list - * to be shared by multiple nsScannerSubstring objects. The reference - * counting is not threadsafe, which is not at all a requirement. - * - * When a nsScannerSubstring releases its reference to a buffer list, it - * decrements the usage count of the first buffer in the buffer list that it - * was referencing. It informs the buffer list that it can discard buffers - * starting at that prefix. The buffer list will do so if the usage count of - * that buffer is 0 and if it is the first buffer in the list. It will - * continue to prune buffers starting from the front of the buffer list until - * it finds a buffer that has a usage count that is non-zero. - */ -class nsScannerBufferList - { - public: - - /** - * Buffer objects are directly followed by a data segment. The start - * of the data segment is determined by increment the |this| pointer - * by 1 unit. - */ - class Buffer : public PRCList - { - public: - - void IncrementUsageCount() { ++mUsageCount; } - void DecrementUsageCount() { --mUsageCount; } - - PRBool IsInUse() const { return mUsageCount != 0; } - - const PRUnichar* DataStart() const { return (const PRUnichar*) (this+1); } - PRUnichar* DataStart() { return ( PRUnichar*) (this+1); } - - const PRUnichar* DataEnd() const { return mDataEnd; } - PRUnichar* DataEnd() { return mDataEnd; } - - const Buffer* Next() const { return NS_STATIC_CAST(const Buffer*, next); } - Buffer* Next() { return NS_STATIC_CAST( Buffer*, next); } - - const Buffer* Prev() const { return NS_STATIC_CAST(const Buffer*, prev); } - Buffer* Prev() { return NS_STATIC_CAST( Buffer*, prev); } - - PRUint32 DataLength() const { return mDataEnd - DataStart(); } - void SetDataLength(PRUint32 len) { mDataEnd = DataStart() + len; } - - private: - - friend class nsScannerBufferList; - - PRInt32 mUsageCount; - PRUnichar* mDataEnd; - }; - - /** - * Position objects serve as lightweight pointers into a buffer list. - * The mPosition member must be contained with mBuffer->DataStart() - * and mBuffer->DataEnd(). - */ - class Position - { - public: - - Position() {} - - Position( Buffer* buffer, PRUnichar* position ) - : mBuffer(buffer) - , mPosition(position) - {} - - inline - Position( const nsScannerIterator& aIter ); - - inline - Position& operator=( const nsScannerIterator& aIter ); - - static size_t Distance( const Position& p1, const Position& p2 ); - - Buffer* mBuffer; - PRUnichar* mPosition; - }; - - static Buffer* AllocBufferFromString( const nsAString& ); - static Buffer* AllocBuffer( PRUint32 capacity ); // capacity = number of chars - - nsScannerBufferList( Buffer* buf ) - : mRefCnt(0) - { - PR_INIT_CLIST(&mBuffers); - PR_APPEND_LINK(buf, &mBuffers); - } - - void AddRef() { ++mRefCnt; } - void Release() { if (--mRefCnt == 0) delete this; } - - void Append( Buffer* buf ) { PR_APPEND_LINK(buf, &mBuffers); } - void InsertAfter( Buffer* buf, Buffer* prev ) { PR_INSERT_AFTER(buf, prev); } - void SplitBuffer( const Position& ); - void DiscardUnreferencedPrefix( Buffer* ); - - Buffer* Head() { return NS_STATIC_CAST( Buffer*, PR_LIST_HEAD(&mBuffers)); } - const Buffer* Head() const { return NS_STATIC_CAST(const Buffer*, PR_LIST_HEAD(&mBuffers)); } - - Buffer* Tail() { return NS_STATIC_CAST( Buffer*, PR_LIST_TAIL(&mBuffers)); } - const Buffer* Tail() const { return NS_STATIC_CAST(const Buffer*, PR_LIST_TAIL(&mBuffers)); } - - private: - - friend class nsScannerSubstring; - - ~nsScannerBufferList() { ReleaseAll(); } - void ReleaseAll(); - - PRInt32 mRefCnt; - PRCList mBuffers; - }; - - - /** - * nsScannerFragment represents a "slice" of a Buffer object. - */ -struct nsScannerFragment - { - typedef nsScannerBufferList::Buffer Buffer; - - const Buffer* mBuffer; - const PRUnichar* mFragmentStart; - const PRUnichar* mFragmentEnd; - }; - - - /** - * nsScannerSubstring is the base class for nsScannerString. It provides - * access to iterators and methods to bind the substring to another - * substring or nsAString instance. - * - * This class owns the buffer list. - */ -class nsScannerSubstring - { - public: - typedef nsScannerBufferList::Buffer Buffer; - typedef nsScannerBufferList::Position Position; - typedef PRUint32 size_type; - - nsScannerSubstring(); - nsScannerSubstring( const nsAString& s ); - - ~nsScannerSubstring(); - - nsScannerIterator& BeginReading( nsScannerIterator& iter ) const; - nsScannerIterator& EndReading( nsScannerIterator& iter ) const; - - size_type Length() const { return mLength; } - - PRInt32 CountChar( PRUnichar ) const; - - void Rebind( const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator& ); - void Rebind( const nsAString& ); - - const nsString& AsString() const; - - PRBool GetNextFragment( nsScannerFragment& ) const; - PRBool GetPrevFragment( nsScannerFragment& ) const; - - static inline Buffer* AllocBufferFromString( const nsAString& aStr ) { return nsScannerBufferList::AllocBufferFromString(aStr); } - static inline Buffer* AllocBuffer( size_type aCapacity ) { return nsScannerBufferList::AllocBuffer(aCapacity); } - - protected: - - void acquire_ownership_of_buffer_list() const - { - mBufferList->AddRef(); - mStart.mBuffer->IncrementUsageCount(); - } - - void release_ownership_of_buffer_list() - { - if (mBufferList) - { - mStart.mBuffer->DecrementUsageCount(); - mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer); - mBufferList->Release(); - } - } - - void init_range_from_buffer_list() - { - mStart.mBuffer = mBufferList->Head(); - mStart.mPosition = mStart.mBuffer->DataStart(); - - mEnd.mBuffer = mBufferList->Tail(); - mEnd.mPosition = mEnd.mBuffer->DataEnd(); - - mLength = Position::Distance(mStart, mEnd); - } - - Position mStart; - Position mEnd; - nsScannerBufferList *mBufferList; - size_type mLength; - - // these fields are used to implement AsString - nsString mFlattenedRep; - PRBool mIsDirty; - }; - - - /** - * nsScannerString provides methods to grow and modify a buffer list. - */ -class nsScannerString : public nsScannerSubstring - { - public: - - nsScannerString( Buffer* ); - - // you are giving ownership to the string, it takes and keeps your - // buffer, deleting it when done. - // Use AllocBuffer or AllocBufferFromString to create a Buffer object - // for use with this function. - void AppendBuffer( Buffer* ); - - void DiscardPrefix( const nsScannerIterator& ); - // any other way you want to do this? - - void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition); - void ReplaceCharacter(nsScannerIterator& aPosition, PRUnichar aChar); - }; - - - /** - * nsScannerIterator works just like nsReadingIterator except that - * it knows how to iterate over a list of scanner buffers. - */ -class nsScannerIterator - { - public: - typedef nsScannerIterator self_type; - typedef ptrdiff_t difference_type; - typedef PRUnichar value_type; - typedef const PRUnichar* pointer; - typedef const PRUnichar& reference; - typedef nsScannerSubstring::Buffer Buffer; - - protected: - - nsScannerFragment mFragment; - const PRUnichar* mPosition; - const nsScannerSubstring* mOwner; - - friend class nsScannerSubstring; - - public: - nsScannerIterator() {} - // nsScannerIterator( const nsScannerIterator& ); // auto-generated copy-constructor OK - // nsScannerIterator& operator=( const nsScannerIterator& ); // auto-generated copy-assignment operator OK - - inline void normalize_forward(); - inline void normalize_backward(); - - pointer get() const - { - return mPosition; - } - - PRUnichar operator*() const - { - return *get(); - } - - const nsScannerFragment& fragment() const - { - return mFragment; - } - - const Buffer* buffer() const - { - return mFragment.mBuffer; - } - - self_type& operator++() - { - ++mPosition; - normalize_forward(); - return *this; - } - - self_type operator++( int ) - { - self_type result(*this); - ++mPosition; - normalize_forward(); - return result; - } - - self_type& operator--() - { - normalize_backward(); - --mPosition; - return *this; - } - - self_type operator--( int ) - { - self_type result(*this); - normalize_backward(); - --mPosition; - return result; - } - - difference_type size_forward() const - { - return mFragment.mFragmentEnd - mPosition; - } - - difference_type size_backward() const - { - return mPosition - mFragment.mFragmentStart; - } - - self_type& advance( difference_type n ) - { - while ( n > 0 ) - { - difference_type one_hop = NS_MIN(n, size_forward()); - - NS_ASSERTION(one_hop>0, "Infinite loop: can't advance a reading iterator beyond the end of a string"); - // perhaps I should |break| if |!one_hop|? - - mPosition += one_hop; - normalize_forward(); - n -= one_hop; - } - - while ( n < 0 ) - { - normalize_backward(); - difference_type one_hop = NS_MAX(n, -size_backward()); - - NS_ASSERTION(one_hop<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string"); - // perhaps I should |break| if |!one_hop|? - - mPosition += one_hop; - n -= one_hop; - } - - return *this; - } - }; - - -inline -PRBool -SameFragment( const nsScannerIterator& a, const nsScannerIterator& b ) - { - return a.fragment().mFragmentStart == b.fragment().mFragmentStart; - } - - - /** - * this class is needed in order to make use of the methods in nsAlgorithm.h - */ -NS_SPECIALIZE_TEMPLATE -struct nsCharSourceTraits - { - typedef nsScannerIterator::difference_type difference_type; - - static - PRUint32 - readable_distance( const nsScannerIterator& first, const nsScannerIterator& last ) - { - return PRUint32(SameFragment(first, last) ? last.get() - first.get() : first.size_forward()); - } - - static - const nsScannerIterator::value_type* - read( const nsScannerIterator& iter ) - { - return iter.get(); - } - - static - void - advance( nsScannerIterator& s, difference_type n ) - { - s.advance(n); - } - }; - - - /** - * inline methods follow - */ - -inline -void -nsScannerIterator::normalize_forward() - { - while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment)) - mPosition = mFragment.mFragmentStart; - } - -inline -void -nsScannerIterator::normalize_backward() - { - while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment)) - mPosition = mFragment.mFragmentEnd; - } - -inline -PRBool -operator==( const nsScannerIterator& lhs, const nsScannerIterator& rhs ) - { - return lhs.get() == rhs.get(); - } - -inline -PRBool -operator!=( const nsScannerIterator& lhs, const nsScannerIterator& rhs ) - { - return lhs.get() != rhs.get(); - } - - -inline -nsScannerBufferList::Position::Position(const nsScannerIterator& aIter) - : mBuffer(NS_CONST_CAST(Buffer*, aIter.buffer())) - , mPosition(NS_CONST_CAST(PRUnichar*, aIter.get())) - {} - -inline -nsScannerBufferList::Position& -nsScannerBufferList::Position::operator=(const nsScannerIterator& aIter) - { - mBuffer = NS_CONST_CAST(Buffer*, aIter.buffer()); - mPosition = NS_CONST_CAST(PRUnichar*, aIter.get()); - return *this; - } - - - /** - * scanner string utils - * - * These methods mimic the API provided by nsReadableUtils in xpcom/string. - * Here we provide only the methods that the htmlparser module needs. - */ - -inline -size_t -Distance( const nsScannerIterator& aStart, const nsScannerIterator& aEnd ) - { - typedef nsScannerBufferList::Position Position; - return Position::Distance(Position(aStart), Position(aEnd)); - } - -void -CopyUnicodeTo( const nsScannerIterator& aSrcStart, - const nsScannerIterator& aSrcEnd, - nsAString& aDest ); - -inline -void -CopyUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest ) - { - nsScannerIterator begin, end; - CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest); - } - -void -AppendUnicodeTo( const nsScannerIterator& aSrcStart, - const nsScannerIterator& aSrcEnd, - nsAString& aDest ); - -inline -void -AppendUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest ) - { - nsScannerIterator begin, end; - AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest); - } - -PRBool -FindCharInReadable( PRUnichar aChar, - nsScannerIterator& aStart, - const nsScannerIterator& aEnd ); - -PRBool -FindInReadable( const nsAString& aPattern, - nsScannerIterator& aStart, - nsScannerIterator& aEnd, - const nsStringComparator& = nsDefaultStringComparator() ); - -PRBool -RFindInReadable( const nsAString& aPattern, - nsScannerIterator& aStart, - nsScannerIterator& aEnd, - const nsStringComparator& = nsDefaultStringComparator() ); - -inline -PRBool -CaseInsensitiveFindInReadable( const nsAString& aPattern, - nsScannerIterator& aStart, - nsScannerIterator& aEnd ) - { - return FindInReadable(aPattern, aStart, aEnd, - nsCaseInsensitiveStringComparator()); - } - -#endif // !defined(nsScannerString_h___) diff --git a/htmlparser/public/nsToken.h b/htmlparser/public/nsToken.h deleted file mode 100644 index fc6c470cf17b..000000000000 --- a/htmlparser/public/nsToken.h +++ /dev/null @@ -1,296 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - - -/** - * MODULE NOTES: - * @update gess 4/1/98 - * - * This class is defines the basic notion of a token - * within our system. All other tokens are derived from - * this one. It offers a few basic interfaces, but the - * most important is consume(). The consume() method gets - * called during the tokenization process when an instance - * of that particular token type gets detected in the - * input stream. - * - * CToken objects that are allocated from the heap _must_ be allocated - * using the nsTokenAllocator: the nsTokenAllocator object uses an - * arena to manage the tokens. - * - * The nsTokenAllocator object's arena implementation requires - * object size at destruction time to properly recycle the object; - * therefore, CToken::operator delete() is not public. Instead, - * heap-allocated tokens should be destroyed using the static - * Destroy() method, which accepts a token and the arena from which - * the token was allocated. - * - * Leaf classes (that are actually instantiated from the heap) must - * implement the SizeOf() method, which Destroy() uses to determine - * the size of the token in order to properly recycle it. - */ - - -#ifndef CTOKEN__ -#define CTOKEN__ - -#include "prtypes.h" -#include "nsString.h" -#include "nsError.h" -#include "nsFixedSizeAllocator.h" - -#define NS_HTMLTOKENS_NOT_AN_ENTITY \ - NS_ERROR_GENERATE_SUCCESS(NS_ERROR_MODULE_HTMLPARSER,2000) - -class nsScanner; -class nsTokenAllocator; - -enum eContainerInfo { - eWellFormed, - eMalformed, - eFormUnknown -}; - -/** - * Implement the SizeOf() method; leaf classes derived from CToken - * must declare this. - */ -#define CTOKEN_IMPL_SIZEOF \ -protected: \ - virtual size_t SizeOf() const { return sizeof(*this); } \ -public: - -/** - * Token objects represent sequences of characters as they - * are consumed from the input stream (URL). While they're - * pretty general in nature, we use subclasses (found in - * nsHTMLTokens.h) to define , , , - * , <&entity>, , and tokens. - * - * @update gess 3/25/98 - */ -class CToken { - public: - - enum eTokenOrigin {eSource,eResidualStyle}; - - protected: - - // nsTokenAllocator should be the only class that tries to - // allocate tokens from the heap. - friend class nsTokenAllocator; - - /** - * - * @update harishd 08/01/00 - * @param aSize - - * @param aArena - Allocate memory from this pool. - */ - static void * operator new (size_t aSize,nsFixedSizeAllocator& anArena) CPP_THROW_NEW - { - return anArena.Alloc(aSize); - } - - /** - * Hide operator delete; clients should use Destroy() instead. - */ - static void operator delete (void*,size_t) {} - - public: - /** - * destructor - * @update gess5/11/98 - */ - virtual ~CToken(); - - /** - * Destroy a token. - */ - static void Destroy(CToken* aToken,nsFixedSizeAllocator& aArenaPool) - { - size_t sz = aToken->SizeOf(); - aToken->~CToken(); - aArenaPool.Free(aToken, sz); - } - - /** - * Make a note on number of times you have been referenced - * @update harishd 08/02/00 - */ - void AddRef() { ++mUseCount; } - - /** - * Free yourself if no one is holding you. - * @update harishd 08/02/00 - */ - void Release(nsFixedSizeAllocator& aArenaPool) { - if(--mUseCount==0) - Destroy(this, aArenaPool); - } - - /** - * Default constructor - * @update gess7/21/98 - */ - CToken(PRInt32 aTag=0); - - /** - * Retrieve string value of the token - * @update gess5/11/98 - * @return reference to string containing string value - */ - virtual const nsAString& GetStringValue(void) = 0; - - /** - * Get string of full contents, suitable for debug dump. - * It should look exactly like the input source. - * @update gess5/11/98 - * @return reference to string containing string value - */ - virtual void GetSource(nsString& anOutputString); - - /** @update harishd 03/23/00 - * @return reference to string containing string value - */ - virtual void AppendSourceTo(nsAString& anOutputString); - - /** - * Sets the ordinal value of this token (not currently used) - * @update gess5/11/98 - * @param value is the new ord value for this token - */ - void SetTypeID(PRInt32 aValue) { - mTypeID = aValue; - } - - /** - * Getter which retrieves the current ordinal value for this token - * @update gess5/11/98 - * @return current ordinal value - */ - virtual PRInt32 GetTypeID(void); - - /** - * Getter which retrieves the current attribute count for this token - * @update gess5/11/98 - * @return current attribute count - */ - virtual PRInt16 GetAttributeCount(void); - - /** - * Causes token to consume data from given scanner. - * Note that behavior varies wildly between CToken subclasses. - * @update gess5/11/98 - * @param aChar -- most recent char consumed - * @param aScanner -- input source where token should get data - * @return error code (0 means ok) - */ - virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); - - /** - * Getter which retrieves type of token - * @update gess5/11/98 - * @return int containing token type - */ - virtual PRInt32 GetTokenType(void); - - /** - * For tokens who care, this can tell us whether the token is - * well formed or not. - * - * @update gess 8/30/00 - * @return PR_FALSE; subclasses MUST override if they care. - */ - virtual PRBool IsWellFormed(void) const {return PR_FALSE;} - - virtual PRBool IsEmpty(void) { return PR_FALSE; } - - /** - * If aValue is TRUE then the token represents a short-hand tag - */ - virtual void SetEmpty(PRBool aValue) { return ; } - - PRInt32 GetNewlineCount() - { - return mNewlineCount; - } - - void SetNewlineCount(PRInt32 aCount) - { - mNewlineCount = aCount; - } - - PRInt32 GetLineNumber() - { - return mLineNumber; - } - - void SetLineNumber(PRInt32 aLineNumber) - { - mLineNumber = mLineNumber == 0 ? aLineNumber : mLineNumber; - } - - void SetAttributeCount(PRInt16 aValue) { mAttrCount = aValue; } - - /** - * perform self test. - * @update gess5/11/98 - */ - virtual void SelfTest(void); - - static int GetTokenCount(); - - - -protected: - /** - * Returns the size of the token object. - */ - virtual size_t SizeOf() const = 0; - - PRInt32 mTypeID; - PRInt32 mUseCount; - PRInt32 mNewlineCount; - PRInt32 mLineNumber; - PRInt16 mAttrCount; -}; - - - -#endif - - diff --git a/htmlparser/robot/.cvsignore b/htmlparser/robot/.cvsignore deleted file mode 100644 index 4a7ba5f079bf..000000000000 --- a/htmlparser/robot/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -htmlrobot diff --git a/htmlparser/robot/nsDebugRobot.cpp b/htmlparser/robot/nsDebugRobot.cpp deleted file mode 100644 index 5bb2d544ed10..000000000000 --- a/htmlparser/robot/nsDebugRobot.cpp +++ /dev/null @@ -1,356 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Pierre Phaneuf - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -#include "nsIRobotSink.h" -#include "nsIRobotSinkObserver.h" -#include "nsIParser.h" -#include "nsIDocShell.h" -#include "nsIWebNavigation.h" -#include "nsIWebProgress.h" -#include "nsIWebProgressListener.h" -#include "nsWeakReference.h" -#include "nsVoidArray.h" -#include "nsString.h" -#include "nsReadableUtils.h" -#include "nsIURL.h" -#include "nsIServiceManager.h" -#include "nsIURL.h" -#include "nsIIOService.h" -#include "nsNetCID.h" -#include "nsIComponentManager.h" -#include "nsParserCIID.h" -#include "nsIInterfaceRequestor.h" -#include "nsIInterfaceRequestorUtils.h" - -static NS_DEFINE_CID(kIOServiceCID, NS_IOSERVICE_CID); - -class RobotSinkObserver : public nsIRobotSinkObserver { -public: - RobotSinkObserver() { - } - - virtual ~RobotSinkObserver() { - } - - NS_DECL_ISUPPORTS - - NS_IMETHOD ProcessLink(const nsString& aURLSpec); - NS_IMETHOD VerifyDirectory (const char * verify_dir); - -}; - -static nsVoidArray * g_workList; -static nsVoidArray * g_duplicateList; -static int g_iProcessed; -static int g_iMaxProcess = 5000; -static PRBool g_bHitTop; -static PRBool g_bReadyForNextUrl; - -NS_IMPL_ISUPPORTS1(RobotSinkObserver, nsIRobotSinkObserver) - -NS_IMETHODIMP RobotSinkObserver::VerifyDirectory(const char * verify_dir) -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSinkObserver::ProcessLink(const nsString& aURLSpec) -{ - if (!g_bHitTop) { - - nsAutoString str; - // Geez this is ugly. temporary hack to only process html files - str.Truncate(); - nsString(aURLSpec).Right(str,1); - if (!str.Equals(NS_LITERAL_STRING("/"))) - { - str.Truncate(); - nsString(aURLSpec).Right(str,4); - if (!str.Equals(NS_LITERAL_STRING("html"))) - { - str.Truncate(); - nsString(aURLSpec).Right(str,3); - if (!str.Equals(NS_LITERAL_STRING("htm"))) - return NS_OK; - } - } - PRInt32 nCount = g_duplicateList->Count(); - if (nCount > 0) - { - for (PRInt32 n = 0; n < nCount; ++n) - { - nsString * pstr = (nsString *)g_duplicateList->ElementAt(n); - if (pstr->Equals(aURLSpec)) { - fputs ("Robot: (duplicate '",stdout); - fputs (NS_LossyConvertUCS2toASCII(aURLSpec).get(),stdout); - fputs ("')\n",stdout); - return NS_OK; - } - } - } - g_duplicateList->AppendElement(new nsString(aURLSpec)); - str.Truncate(); - nsString(aURLSpec).Left(str,5); - if (str.Equals(NS_LITERAL_STRING("http:"))) { - ++g_iProcessed; - if (g_iProcessed == (g_iMaxProcess > 0 ? g_iMaxProcess-1 : 0)) - g_bHitTop = PR_TRUE; - g_workList->AppendElement(new nsString(aURLSpec)); - } - else { - fputs ("Robot: (cannot process URL types '",stdout); - fputs (NS_LossyConvertUCS2toASCII(aURLSpec).get(),stdout); - fputs ("')\n",stdout); - } - } - return NS_OK; -} - -extern "C" NS_EXPORT void SetVerificationDirectory(char * verify_dir); - -class CStreamListener: public nsIWebProgressListener, - public nsSupportsWeakReference -{ -public: - CStreamListener() { - } - - virtual ~CStreamListener() { - } - - NS_DECL_ISUPPORTS - - // nsIWebProgressListener - NS_DECL_NSIWEBPROGRESSLISTENER -}; - -// nsIWebProgressListener implementation -NS_IMETHODIMP -CStreamListener::OnStateChange(nsIWebProgress* aWebProgress, - nsIRequest *aRequest, - PRUint32 progressStateFlags, - nsresult aStatus) -{ - if (progressStateFlags & nsIWebProgressListener::STATE_IS_DOCUMENT) - if (progressStateFlags & nsIWebProgressListener::STATE_STOP) { - fputs("done.\n",stdout); - g_bReadyForNextUrl = PR_TRUE; - } - return NS_OK; -} - -NS_IMETHODIMP -CStreamListener::OnProgressChange(nsIWebProgress *aWebProgress, - nsIRequest *aRequest, - PRInt32 aCurSelfProgress, - PRInt32 aMaxSelfProgress, - PRInt32 aCurTotalProgress, - PRInt32 aMaxTotalProgress) -{ - NS_NOTREACHED("notification excluded in AddProgressListener(...)"); - return NS_OK; -} - -NS_IMETHODIMP -CStreamListener::OnLocationChange(nsIWebProgress* aWebProgress, - nsIRequest* aRequest, - nsIURI *location) -{ - NS_NOTREACHED("notification excluded in AddProgressListener(...)"); - return NS_OK; -} - - -NS_IMETHODIMP -CStreamListener::OnStatusChange(nsIWebProgress* aWebProgress, - nsIRequest* aRequest, - nsresult aStatus, - const PRUnichar* aMessage) -{ - NS_NOTREACHED("notification excluded in AddProgressListener(...)"); - return NS_OK; -} - - -NS_IMETHODIMP -CStreamListener::OnSecurityChange(nsIWebProgress *aWebProgress, - nsIRequest *aRequest, - PRUint32 state) -{ - NS_NOTREACHED("notification excluded in AddProgressListener(...)"); - return NS_OK; -} - -NS_IMPL_ISUPPORTS2(CStreamListener, - nsIWebProgressListener, - nsISupportsWeakReference) - -extern "C" NS_EXPORT void DumpVectorRecord(void); -//---------------------------------------------------------------------- -extern "C" NS_EXPORT int DebugRobot( - nsVoidArray * workList, - nsIDocShell * docShell, - int iMaxLoads, - char * verify_dir, - void (*yieldProc )(const char *) - ) -{ - int iCount = 1; - CStreamListener * pl = new CStreamListener; - NS_ADDREF(pl); - - if (nsnull==workList) - return -1; - g_iMaxProcess = iMaxLoads; - g_iProcessed = 0; - g_bHitTop = PR_FALSE; - g_duplicateList = new nsVoidArray(); - RobotSinkObserver* myObserver = new RobotSinkObserver(); - NS_ADDREF(myObserver); - g_workList = workList; - - for (;;) { - PRInt32 n = g_workList->Count(); - if (0 == n) { - break; - } - nsString* urlName = (nsString*) g_workList->ElementAt(n - 1); - g_workList->RemoveElementAt(n - 1); - - // Create url - nsIURI* url; - nsresult rv; - nsCOMPtr service(do_GetService(kIOServiceCID, &rv)); - if (NS_FAILED(rv)) return rv; - - nsIURI *uri = nsnull; - NS_ConvertUCS2toUTF8 uriStr(*urlName); - rv = service->NewURI(uriStr, nsnull, nsnull, &uri); - if (NS_FAILED(rv)) return rv; - - rv = uri->QueryInterface(NS_GET_IID(nsIURI), (void**)&url); - NS_RELEASE(uri); - if (NS_OK != rv) { - printf("invalid URL: '"); - fputs(uriStr.get(), stdout); - printf("'\n"); - NS_RELEASE(myObserver); - return -1; - } - - char str_num[25]; - sprintf (str_num,"%d",iCount++); - fputs ("Robot: parsing(",stdout); - fputs (str_num,stdout); - fputs (") ",stdout); - fputs (NS_LossyConvertUCS2toASCII(*urlName).get(),stdout); - fputs ("...",stdout); - - delete urlName; - - nsIParser* parser; - - static NS_DEFINE_IID(kCParserIID, NS_IPARSER_IID); - static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID); - - rv = nsComponentManager::CreateInstance(kCParserCID, - nsnull, - kCParserIID, - (void **)&parser); - - if (NS_OK != rv) { - printf("can't make parser\n"); - NS_RELEASE(myObserver); - return -1; - } - - nsIRobotSink* sink; - rv = NS_NewRobotSink(&sink); - if (NS_OK != rv) { - printf("can't make parser\n"); - NS_RELEASE(myObserver); - return -1; - } - sink->Init(url); - sink->AddObserver(myObserver); - - parser->SetContentSink(sink); - g_bReadyForNextUrl = PR_FALSE; - - parser->Parse(url, nsnull,PR_TRUE);/* XXX hook up stream listener here! */ - while (!g_bReadyForNextUrl) { - if (yieldProc != NULL) { - nsCAutoString spec; - (void)url->GetSpec(spec); - (*yieldProc)(spec.get()); - } - } - g_bReadyForNextUrl = PR_FALSE; - if (docShell) { - nsCOMPtr progress(do_GetInterface(docShell, &rv)); - if (NS_FAILED(rv)) return rv; - - (void) progress->AddProgressListener(pl, nsIWebProgress::NOTIFY_STATE_DOCUMENT); - - nsCAutoString spec; - (void)url->GetSpec(spec); - NS_ConvertUTF8toUCS2 theSpec(spec); - nsCOMPtr webNav(do_QueryInterface(docShell)); - webNav->LoadURI(theSpec.get(), - nsIWebNavigation::LOAD_FLAGS_NONE, - nsnull, - nsnull, - nsnull);/* XXX hook up stream listener here! */ - while (!g_bReadyForNextUrl) { - if (yieldProc != NULL) { - (void)url->GetSpec(spec); - (*yieldProc)(spec.get()); - } - } - } - - NS_RELEASE(sink); - NS_RELEASE(parser); - NS_RELEASE(url); - } - - fputs ("Robot completed.\n", stdout); - - NS_RELEASE(pl); - NS_RELEASE(myObserver); - - return 0; -} diff --git a/htmlparser/robot/nsIRobotSink.h b/htmlparser/robot/nsIRobotSink.h deleted file mode 100644 index cd5d0b166375..000000000000 --- a/htmlparser/robot/nsIRobotSink.h +++ /dev/null @@ -1,59 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -#ifndef nsIRobotSink_h___ -#define nsIRobotSink_h___ - -#include "nsIHTMLContentSink.h" -class nsIURI; -class nsIRobotSinkObserver; - -/* 61256800-cfd8-11d1-9328-00805f8add32 */ -#define NS_IROBOTSINK_IID \ -{ 0x61256800, 0xcfd8, 0x11d1, \ - {0x93, 0x28, 0x00, 0x80, 0x5f, 0x8a, 0xdd, 0x32} } - -class nsIRobotSink : public nsIHTMLContentSink { -public: - NS_IMETHOD Init(nsIURI* aDocumentURL) = 0; - NS_IMETHOD AddObserver(nsIRobotSinkObserver* aObserver) = 0; - NS_IMETHOD RemoveObserver(nsIRobotSinkObserver* aObserver) = 0; - -}; - -extern nsresult NS_NewRobotSink(nsIRobotSink** aInstancePtrResult); - -#endif /* nsIRobotSink_h___ */ diff --git a/htmlparser/robot/nsIRobotSinkObserver.h b/htmlparser/robot/nsIRobotSinkObserver.h deleted file mode 100644 index 88b19aef080f..000000000000 --- a/htmlparser/robot/nsIRobotSinkObserver.h +++ /dev/null @@ -1,56 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -#ifndef nsIRobotSinkObserver_h___ -#define nsIRobotSinkObserver_h___ - -#include "nsISupports.h" -class nsString; - -/* fab1d970-cfda-11d1-9328-00805f8add32 */ -#define NS_IROBOTSINKOBSERVER_IID \ -{ 0xfab1d970, 0xcfda, 0x11d1, \ - {0x93, 0x28, 0x00, 0x80, 0x5f, 0x8a, 0xdd, 0x32} } - -class nsIRobotSinkObserver : public nsISupports { -public: - NS_DEFINE_STATIC_IID_ACCESSOR(NS_IROBOTSINKOBSERVER_IID) - - NS_IMETHOD ProcessLink(const nsString& aURLSpec) = 0; - NS_IMETHOD VerifyDirectory(const char * verify_dir) = 0; -}; - -#endif /* nsIRobotSinkObserver_h___ */ diff --git a/htmlparser/robot/nsRobotSink.cpp b/htmlparser/robot/nsRobotSink.cpp deleted file mode 100644 index ef2ed62cd967..000000000000 --- a/htmlparser/robot/nsRobotSink.cpp +++ /dev/null @@ -1,401 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Pierre Phaneuf - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -#include "nsIRobotSink.h" -#include "nsIRobotSinkObserver.h" -#include "nsIParserNode.h" -#include "nsIParser.h" -#include "nsString.h" -#include "nsReadableUtils.h" -#include "nsUnicharUtils.h" -#include "nsIURL.h" -#include "nsIURL.h" -#include "nsIServiceManager.h" -#include "nsIIOService.h" -#include "nsNetCID.h" -#include "nsCRT.h" -#include "nsVoidArray.h" -class nsIDocument; - -// TODO -// - add in base tag support -// - get links from other sources: -// - LINK tag -// - STYLE SRC -// - IMG SRC -// - LAYER SRC - -static NS_DEFINE_CID(kIOServiceCID, NS_IOSERVICE_CID); -static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); -static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID); -static NS_DEFINE_IID(kIRobotSinkIID, NS_IROBOTSINK_IID); - -class RobotSink : public nsIRobotSink { -public: - RobotSink(); - virtual ~RobotSink(); - - NS_DECL_AND_IMPL_ZEROING_OPERATOR_NEW - - // nsISupports - NS_DECL_ISUPPORTS - - // nsIHTMLContentSink - NS_IMETHOD SetTitle(const nsString& aValue); - NS_IMETHOD OpenHTML(const nsIParserNode& aNode); - NS_IMETHOD CloseHTML(); - NS_IMETHOD OpenHead(const nsIParserNode& aNode); - NS_IMETHOD CloseHead(); - NS_IMETHOD OpenBody(const nsIParserNode& aNode); - NS_IMETHOD CloseBody(); - NS_IMETHOD OpenForm(const nsIParserNode& aNode); - NS_IMETHOD CloseForm(); - NS_IMETHOD OpenMap(const nsIParserNode& aNode); - NS_IMETHOD CloseMap(); - NS_IMETHOD OpenFrameset(const nsIParserNode& aNode); - NS_IMETHOD CloseFrameset(); - NS_IMETHOD IsEnabled(PRInt32 aTag, PRBool* aReturn) { return NS_OK; } - NS_IMETHOD_(PRBool) IsFormOnStack() { return PR_FALSE; } - - NS_IMETHOD OpenContainer(const nsIParserNode& aNode); - NS_IMETHOD CloseContainer(const nsHTMLTag aTag); - NS_IMETHOD CloseTopmostContainer(); - NS_IMETHOD AddHeadContent(const nsIParserNode& aNode); - NS_IMETHOD AddLeaf(const nsIParserNode& aNode); - NS_IMETHOD AddComment(const nsIParserNode& aNode); - NS_IMETHOD AddProcessingInstruction(const nsIParserNode& aNode); - NS_IMETHOD AddDocTypeDecl(const nsIParserNode& aNode); - NS_IMETHOD WillBuildModel(void) { return NS_OK; } - NS_IMETHOD DidBuildModel(void) { return NS_OK; } - NS_IMETHOD WillInterrupt(void) { return NS_OK; } - NS_IMETHOD WillResume(void) { return NS_OK; } - NS_IMETHOD SetParser(nsIParser* aParser) { return NS_OK; } - NS_IMETHOD FlushPendingNotifications() { return NS_OK; } - NS_IMETHOD SetDocumentCharset(nsACString& aCharset) { return NS_OK; } - NS_IMETHOD WillProcessTokens(void) { return NS_OK; } - NS_IMETHOD DidProcessTokens(void) { return NS_OK; } - NS_IMETHOD WillProcessAToken(void) { return NS_OK; } - NS_IMETHOD DidProcessAToken(void) { return NS_OK; } - NS_IMETHOD NotifyTagObservers(nsIParserNode* aNode) { return NS_OK; } - - NS_IMETHOD BeginContext(PRInt32 aPosition){ return NS_OK; } - NS_IMETHOD EndContext(PRInt32 aPosition){ return NS_OK; } - - // nsIRobotSink - NS_IMETHOD Init(nsIURI* aDocumentURL); - NS_IMETHOD AddObserver(nsIRobotSinkObserver* aObserver); - NS_IMETHOD RemoveObserver(nsIRobotSinkObserver* aObserver); - - void ProcessLink(const nsString& aLink); - -protected: - nsIURI* mDocumentURL; - nsVoidArray mObservers; -}; - -nsresult NS_NewRobotSink(nsIRobotSink** aInstancePtrResult) -{ - RobotSink* it = new RobotSink(); - if(it) - return it->QueryInterface(kIRobotSinkIID, (void**) aInstancePtrResult); - return NS_OK; -} - -RobotSink::RobotSink() -{ -} - -RobotSink::~RobotSink() -{ - NS_IF_RELEASE(mDocumentURL); - PRInt32 i, n = mObservers.Count(); - for (i = 0; i < n; ++i) { - nsIRobotSinkObserver* cop = (nsIRobotSinkObserver*)mObservers.ElementAt(i); - NS_RELEASE(cop); - } -} - -NS_IMPL_ADDREF(RobotSink) - -NS_IMPL_RELEASE(RobotSink) - -NS_IMETHODIMP RobotSink::QueryInterface(REFNSIID aIID, void** aInstancePtr) -{ - if (NULL == aInstancePtr) { - return NS_ERROR_NULL_POINTER; - } - if (aIID.Equals(kIRobotSinkIID)) { - *aInstancePtr = (void*) this; - AddRef(); - return NS_OK; - } - if (aIID.Equals(kIHTMLContentSinkIID)) { - *aInstancePtr = (void*) this; - AddRef(); - return NS_OK; - } - if (aIID.Equals(kISupportsIID)) { - *aInstancePtr = (void*) ((nsISupports*)this); - AddRef(); - return NS_OK; - } - return NS_NOINTERFACE; -} - -NS_IMETHODIMP RobotSink::SetTitle(const nsString& aValue) -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::OpenHTML(const nsIParserNode& aNode) -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::CloseHTML() -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::OpenHead(const nsIParserNode& aNode) -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::CloseHead() -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::OpenBody(const nsIParserNode& aNode) -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::CloseBody() -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::OpenForm(const nsIParserNode& aNode) -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::CloseForm() -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::OpenMap(const nsIParserNode& aNode) -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::CloseMap() -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::OpenFrameset(const nsIParserNode& aNode) -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::CloseFrameset() -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::OpenContainer(const nsIParserNode& aNode) -{ - nsAutoString tmp; tmp.Assign(aNode.GetText()); - ToLowerCase(tmp); - if (tmp.Equals(NS_LITERAL_STRING("a"))) { - nsAutoString k, v; - PRInt32 ac = aNode.GetAttributeCount(); - for (PRInt32 i = 0; i < ac; ++i) { - // Get upper-cased key - const nsAString& key = aNode.GetKeyAt(i); - k.Assign(key); - ToLowerCase(k); - if (k.Equals(NS_LITERAL_STRING("href"))) { - // Get value and remove mandatory quotes - v.Truncate(); - v.Append(aNode.GetValueAt(i)); - PRUnichar first = v.First(); - if ((first == '"') || (first == '\'')) { - if (v.Last() == first) { - v.Cut(0, 1); - PRInt32 pos = v.Length() - 1; - if (pos >= 0) { - v.Cut(pos, 1); - } - } else { - // Mismatched quotes - leave them in - } - } - ProcessLink(v); - } - } - } - return NS_OK; -} - -NS_IMETHODIMP RobotSink::CloseContainer(const nsHTMLTag aTag) -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::CloseTopmostContainer() -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::AddHeadContent(const nsIParserNode& aNode) -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::AddLeaf(const nsIParserNode& aNode) -{ - return NS_OK; -} - -/** - * This gets called by the parsing system when we find a comment - * @update gess11/9/98 - * @param aNode contains a comment token - * @return error code - */ -NS_IMETHODIMP RobotSink::AddComment(const nsIParserNode& aNode) { - nsresult result= NS_OK; - return result; -} - -/** - * This gets called by the parsing system when we find a PI - * @update gess11/9/98 - * @param aNode contains a comment token - * @return error code - */ -NS_IMETHODIMP RobotSink::AddProcessingInstruction(const nsIParserNode& aNode) { - nsresult result= NS_OK; - return result; -} - -/** - * This gets called by the parser when it encounters - * a DOCTYPE declaration in the HTML document. - */ - -NS_IMETHODIMP -RobotSink::AddDocTypeDecl(const nsIParserNode& aNode) -{ - return NS_OK; -} - -NS_IMETHODIMP RobotSink::Init(nsIURI* aDocumentURL) -{ - NS_IF_RELEASE(mDocumentURL); - mDocumentURL = aDocumentURL; - NS_IF_ADDREF(aDocumentURL); - return NS_OK; -} - -NS_IMETHODIMP RobotSink::AddObserver(nsIRobotSinkObserver* aObserver) -{ - if (mObservers.AppendElement(aObserver)) { - NS_ADDREF(aObserver); - return NS_OK; - } - return NS_ERROR_OUT_OF_MEMORY; -} - -NS_IMETHODIMP RobotSink::RemoveObserver(nsIRobotSinkObserver* aObserver) -{ - if (mObservers.RemoveElement(aObserver)) { - NS_RELEASE(aObserver); - return NS_OK; - } - //XXX return NS_ERROR_NOT_FOUND; - return NS_OK; -} - -void RobotSink::ProcessLink(const nsString& aLink) -{ - nsAutoString absURLSpec; absURLSpec.Assign(aLink); - - // Make link absolute - // XXX base tag handling - nsIURI* docURL = mDocumentURL; - if (nsnull != docURL) { - nsIURI* absurl; - nsresult rv; - nsCOMPtr service(do_GetService(kIOServiceCID, &rv)); - if (NS_FAILED(rv)) return; - - nsIURI *uri = nsnull, *baseUri = nsnull; - - rv = mDocumentURL->QueryInterface(NS_GET_IID(nsIURI), (void**)&baseUri); - if (NS_FAILED(rv)) return; - - NS_ConvertUCS2toUTF8 uriStr(aLink); - rv = service->NewURI(uriStr, nsnull, baseUri, &uri); - NS_RELEASE(baseUri); - if (NS_FAILED(rv)) return; - - rv = uri->QueryInterface(NS_GET_IID(nsIURI), (void**)&absurl); - NS_RELEASE(uri); - - if (NS_OK == rv) { - absURLSpec.Truncate(); - nsCAutoString str; - absurl->GetSpec(str); - absURLSpec = NS_ConvertUTF8toUCS2(str); - } - } - - // Now give link to robot observers - PRInt32 i, n = mObservers.Count(); - for (i = 0; i < n; ++i) { - nsIRobotSinkObserver* cop = (nsIRobotSinkObserver*)mObservers.ElementAt(i); - cop->ProcessLink(absURLSpec); - } -} - - diff --git a/htmlparser/robot/test/.cvsignore b/htmlparser/robot/test/.cvsignore deleted file mode 100644 index 4a7ba5f079bf..000000000000 --- a/htmlparser/robot/test/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -htmlrobot diff --git a/htmlparser/robot/test/RobotMain.cpp b/htmlparser/robot/test/RobotMain.cpp deleted file mode 100644 index 785054b568d2..000000000000 --- a/htmlparser/robot/test/RobotMain.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include "nsXPCOM.h" -#include "nsVoidArray.h" -#include "nsString.h" -class nsIWebShell; - -extern "C" NS_EXPORT int DebugRobot(nsVoidArray * workList, nsIWebShell * ww); - -int main(int argc, char **argv) -{ - nsresult rv = NS_InitXPCOM2(nsnull, nsnull, nsnull); - if (NS_FAILED(rv)) { - printf("NS_InitXPCOM2 failed\n"); - return 1; - } - - nsVoidArray * gWorkList = new nsVoidArray(); - if(gWorkList) { - int i; - for (i = 1; i < argc; ++i) { - nsString *tempString = new nsString; - tempString->AssignWithConversion(argv[i]); - gWorkList->AppendElement(tempString); - } - } - - return DebugRobot(gWorkList, nsnull); -} - diff --git a/htmlparser/src/.cvsignore b/htmlparser/src/.cvsignore deleted file mode 100644 index f3c7a7c5da68..000000000000 --- a/htmlparser/src/.cvsignore +++ /dev/null @@ -1 +0,0 @@ -Makefile diff --git a/htmlparser/src/CNavDTD.cpp b/htmlparser/src/CNavDTD.cpp deleted file mode 100644 index 6076756640e8..000000000000 --- a/htmlparser/src/CNavDTD.cpp +++ /dev/null @@ -1,3923 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -//#define ENABLE_CRC -//#define ALLOW_TR_AS_CHILD_OF_TABLE //by setting this to true, TR is allowable directly in TABLE. - -#define ENABLE_RESIDUALSTYLE - - -#include "nsDebug.h" -#include "nsIAtom.h" -#include "CNavDTD.h" -#include "nsHTMLTokens.h" -#include "nsCRT.h" -#include "nsParser.h" -#include "nsIParser.h" -#include "nsIHTMLContentSink.h" -#include "nsScanner.h" -#include "prenv.h" //this is here for debug reasons... -#include "prtypes.h" //this is here for debug reasons... -#include "prio.h" -#include "plstr.h" -#include "nsDTDUtils.h" -#include "nsHTMLTokenizer.h" -#include "nsTime.h" -#include "nsParserNode.h" -#include "nsHTMLEntities.h" -#include "nsLinebreakConverter.h" -#include "nsIFormProcessor.h" -#include "nsVoidArray.h" -#include "nsReadableUtils.h" -#include "nsUnicharUtils.h" -#include "prmem.h" -#include "nsIServiceManager.h" - -#ifdef NS_DEBUG -#include "nsLoggingSink.h" -#endif - - -static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); -static NS_DEFINE_IID(kIDTDIID, NS_IDTD_IID); -static NS_DEFINE_IID(kClassIID, NS_INAVHTML_DTD_IID); - -static NS_DEFINE_CID(kFormProcessorCID, NS_FORMPROCESSOR_CID); - -#ifdef DEBUG -static const char kNullToken[] = "Error: Null token given"; -static const char kInvalidTagStackPos[] = "Error: invalid tag stack position"; -#endif - -#ifdef ENABLE_CRC -static char gShowCRC; -#endif - -#include "nsElementTable.h" - - -#ifdef MOZ_PERF_METRICS -# define START_TIMER() \ - if(mParser) MOZ_TIMER_START(mParser->mParseTime); \ - if(mParser) MOZ_TIMER_START(mParser->mDTDTime); - -# define STOP_TIMER() \ - if(mParser) MOZ_TIMER_STOP(mParser->mParseTime); \ - if(mParser) MOZ_TIMER_STOP(mParser->mDTDTime); -#else -# define STOP_TIMER() -# define START_TIMER() -#endif - -/************************************************************************ - And now for the main class -- CNavDTD... - ************************************************************************/ - - -#define NS_DTD_FLAG_NONE 0x00000000 -#define NS_DTD_FLAG_HAS_OPEN_HEAD 0x00000001 -#define NS_DTD_FLAG_HAS_OPEN_BODY 0x00000002 -#define NS_DTD_FLAG_HAS_OPEN_FORM 0x00000004 -#define NS_DTD_FLAG_HAS_OPEN_SCRIPT 0x00000008 -#define NS_DTD_FLAG_HAD_BODY 0x00000010 -#define NS_DTD_FLAG_HAD_FRAMESET 0x00000020 -#define NS_DTD_FLAG_ENABLE_RESIDUAL_STYLE 0x00000040 -#define NS_DTD_FLAG_SCRIPT_ENABLED 0x00000100 -#define NS_DTD_FLAG_FRAMES_ENABLED 0x00000200 -#define NS_DTD_FLAG_ALTERNATE_CONTENT 0x00000400 // NOFRAMES, NOSCRIPT -#define NS_DTD_FLAG_MISPLACED_CONTENT 0x00000800 -#define NS_DTD_FLAG_STOP_PARSING 0x00001000 - -/** - * This method gets called as part of our COM-like interfaces. - * Its purpose is to create an interface to parser object - * of some type. - * - * @update gess 4/8/98 - * @param nsIID id of object to discover - * @param aInstancePtr ptr to newly discovered interface - * @return NS_xxx result code - */ -nsresult CNavDTD::QueryInterface(const nsIID& aIID, void** aInstancePtr) -{ - if (NULL == aInstancePtr) { - return NS_ERROR_NULL_POINTER; - } - - if(aIID.Equals(kISupportsIID)) { //do IUnknown... - *aInstancePtr = (nsIDTD*)(this); - } - else if(aIID.Equals(kIDTDIID)) { //do IParser base class... - *aInstancePtr = (nsIDTD*)(this); - } - else if(aIID.Equals(kClassIID)) { //do this class... - *aInstancePtr = (CNavDTD*)(this); - } - else { - *aInstancePtr=0; - return NS_NOINTERFACE; - } - NS_ADDREF_THIS(); - return NS_OK; -} - -NS_IMPL_ADDREF(CNavDTD) -NS_IMPL_RELEASE(CNavDTD) - -/** - * Default constructor - * - * @update gess 4/9/98 - * @param - * @return - */ -CNavDTD::CNavDTD() : nsIDTD(), - mMisplacedContent(0), - mSkippedContent(0), - mSink(0), - mTokenAllocator(0), - mTempContext(0), - mParser(0), - mTokenizer(0), - mDTDMode(eDTDMode_quirks), - mDocType(eHTML3_Quirks), // why not eHTML_Quirks? - mParserCommand(eViewNormal), - mSkipTarget(eHTMLTag_unknown), - mLineNumber(1), - mOpenMapCount(0), - mFlags(NS_DTD_FLAG_NONE) -#ifdef ENABLE_CRC - ,mComputedCRC32(0), - mExpectedCRC32(0) -#endif -{ - mBodyContext=new nsDTDContext(); -} - -/** - * - * @update gess1/8/99 - * @param - * @return - */ -const nsIID& CNavDTD::GetMostDerivedIID(void)const { - return kClassIID; -} - - -#ifdef NS_DEBUG - -nsLoggingSink* GetLoggingSink() { - - //these are used when you want to generate a log file for contentsink construction... - - static PRBool checkForPath=PR_TRUE; - static nsLoggingSink *theSink=0; - static const char* gLogPath=0; - - if(checkForPath) { - - // we're only going to check the environment once per session. - - gLogPath = /* "c:/temp/parse.log"; */ PR_GetEnv("PARSE_LOGFILE"); - checkForPath=PR_FALSE; - } - - - if(gLogPath && (!theSink)) { - static nsLoggingSink gLoggingSink; - - PRIntn theFlags = 0; - - // create the file exists, only open for read/write - // otherwise, create it - if(PR_Access(gLogPath,PR_ACCESS_EXISTS) != PR_SUCCESS) - theFlags = PR_CREATE_FILE; - theFlags |= PR_RDWR; - - // open the record file - PRFileDesc *theLogFile = PR_Open(gLogPath,theFlags,0); - gLoggingSink.SetOutputStream(theLogFile,PR_TRUE); - theSink=&gLoggingSink; - } - - return theSink; -} - -#endif - -/** - * Default destructor - * - * @update gess 4/9/98 - * @param - * @return - */ -CNavDTD::~CNavDTD(){ - if(mBodyContext) { - delete mBodyContext; - mBodyContext=0; - } - - if(mTempContext) { - delete mTempContext; - mTempContext=0; - } - - -#ifdef NS_DEBUG - if(mSink) { - nsLoggingSink *theLogSink=GetLoggingSink(); - if(mSink==theLogSink) { - theLogSink->ReleaseProxySink(); - } - } -#endif - - NS_IF_RELEASE(mSink); -} - - -/** - * Call this method if you want the DTD to construct a fresh - * instance of itself. - * @update gess 25May2000 - * @param - * @return - */ -nsresult CNavDTD::CreateNewInstance(nsIDTD** aInstancePtrResult) -{ - nsresult result = NS_NewNavHTMLDTD(aInstancePtrResult); - NS_ENSURE_SUCCESS(result, result); - - CNavDTD* dtd = NS_STATIC_CAST(CNavDTD*, *aInstancePtrResult); - - dtd->mDTDMode = mDTDMode; - dtd->mParserCommand = mParserCommand; - dtd->mDocType = mDocType; - - return result; -} - -/** - * This method is called to determine if the given DTD can parse - * a document in a given source-type. - * NOTE: Parsing always assumes that the end result will involve - * storing the result in the main content model. - * @update gess 02/24/00 - * @param - * @return TRUE if this DTD can satisfy the request; FALSE otherwise. - */ -NS_IMETHODIMP_(eAutoDetectResult) -CNavDTD::CanParse(CParserContext& aParserContext, - const nsString& aBuffer, PRInt32 aVersion) -{ - eAutoDetectResult result=eUnknownDetect; - - if(aParserContext.mParserCommand != eViewSource) { - if(PR_TRUE==aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kHTMLTextContentType))) { - result=ePrimaryDetect; - } - else if(PR_TRUE==aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kPlainTextContentType))) { - result=ePrimaryDetect; - } - else if(PR_TRUE==aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kTextCSSContentType))) { - result=ePrimaryDetect; - } - else if(PR_TRUE==aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kApplicationJSContentType))) { - result=ePrimaryDetect; - } - else if(PR_TRUE==aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kTextJSContentType))) { - result=ePrimaryDetect; - } - // do this for XML-based content-types so that we don't fall back - // to BufferContainsHTML() for known content types - // see bug 132681 - // this will be cleaned up after moz 1.0 -alecf - else if (aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kRDFTextContentType)) || - aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kXULTextContentType)) || - aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kXMLTextContentType)) || -#ifdef MOZ_SVG - aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kSVGTextContentType)) || -#endif - aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kXMLApplicationContentType))) { - result=eUnknownDetect; - } - else { - //otherwise, look into the buffer to see if you recognize anything... - PRBool theBufHasXML=PR_FALSE; - if(BufferContainsHTML(aBuffer,theBufHasXML)){ - result = eValidDetect ; - if(0==aParserContext.mMimeType.Length()) { - aParserContext.SetMimeType(NS_LITERAL_CSTRING(kHTMLTextContentType)); - if(!theBufHasXML) { - switch(aParserContext.mDTDMode) { - case eDTDMode_full_standards: - case eDTDMode_almost_standards: - result=eValidDetect; - break; - default: - result=ePrimaryDetect; - break; - } - } - else result=eValidDetect; - } - } - } - } - return result; -} - -/** - * The parser uses a code sandwich to wrap the parsing process. Before - * the process begins, WillBuildModel() is called. Afterwards the parser - * calls DidBuildModel(). - * @update rickg 03.20.2000 - * @param aParserContext - * @param aSink - * @return error code (almost always 0) - */ -nsresult CNavDTD::WillBuildModel(const CParserContext& aParserContext, - nsITokenizer* aTokenizer, - nsIContentSink* aSink) { - nsresult result=NS_OK; - - mFilename=aParserContext.mScanner->GetFilename(); - mFlags = NS_DTD_FLAG_ENABLE_RESIDUAL_STYLE; // residual style is always on. This will also reset the flags - mLineNumber = 1; - mDTDMode = aParserContext.mDTDMode; - mParserCommand = aParserContext.mParserCommand; - mMimeType = aParserContext.mMimeType; - mDocType = aParserContext.mDocType; - mSkipTarget = eHTMLTag_unknown; - mTokenizer = aTokenizer; - mBodyContext->SetNodeAllocator(&mNodeAllocator); - - if(!aParserContext.mPrevContext && aSink) { - -#ifdef DEBUG - mBodyContext->ResetCounters(); -#endif - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::WillBuildModel(), this=%p\n", this)); - - result = aSink->WillBuildModel(); - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::WillBuildModel(), this=%p\n", this)); - START_TIMER(); - - if (NS_SUCCEEDED(result) && !mSink) { - result = CallQueryInterface(aSink, &mSink); - if (NS_FAILED(result)) { - mFlags |= NS_DTD_FLAG_STOP_PARSING; - return result; - } - } - - //let's see if the environment is set up for us to write output to - //a logging sink. If so, then we'll create one, and make it the - //proxy for the real sink we're given from the parser. -#ifdef NS_DEBUG - nsLoggingSink *theLogSink=GetLoggingSink(); - if(theLogSink) { - theLogSink->SetProxySink(mSink); - mSink=theLogSink; - } -#endif - - if(mSink) { - PRBool enabled; - mSink->IsEnabled(eHTMLTag_frameset, &enabled); - if(enabled) { - mFlags |= NS_DTD_FLAG_FRAMES_ENABLED; - } - - mSink->IsEnabled(eHTMLTag_script, &enabled); - if(enabled) { - mFlags |= NS_DTD_FLAG_SCRIPT_ENABLED; - } - } - -#ifdef ENABLE_CRC - mComputedCRC32=0; - mExpectedCRC32=0; -#endif - } - - return result; -} - - -/** - * This is called when it's time to read as many tokens from the tokenizer - * as you can. Not all tokens may make sense, so you may not be able to - * read them all (until more come in later). - * - * @update gess5/18/98 - * @param aParser is the parser object that's driving this process - * @return error code (almost always NS_OK) - */ -nsresult CNavDTD::BuildModel(nsIParser* aParser,nsITokenizer* aTokenizer,nsITokenObserver* anObserver,nsIContentSink* aSink) { - NS_PRECONDITION(mBodyContext!=nsnull,"Create a context before calling build model"); - - nsresult result = NS_OK; - - if (aTokenizer && aParser) { - nsITokenizer* oldTokenizer = mTokenizer; - - mTokenizer = aTokenizer; - mParser = (nsParser*)aParser; - mTokenAllocator = mTokenizer->GetTokenAllocator(); - - if (mSink) { - if (mBodyContext->GetCount() == 0) { - CStartToken* theToken=nsnull; - if(ePlainText==mDocType) { - //we do this little trick for text files, in both normal and viewsource mode... - theToken=NS_STATIC_CAST(CStartToken*,mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_pre)); - if(theToken) { - mTokenizer->PushTokenFront(theToken); - } - } - - // always open a body if frames are disabled.... - if(!(mFlags & NS_DTD_FLAG_FRAMES_ENABLED)) { - theToken=NS_STATIC_CAST(CStartToken*,mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_body,NS_LITERAL_STRING("body"))); - mTokenizer->PushTokenFront(theToken); - } - //if the content model is empty, then begin by opening ... - theToken = (CStartToken*)mTokenizer->GetTokenAt(0); - if (theToken) { - eHTMLTags theTag = (eHTMLTags)theToken->GetTypeID(); - eHTMLTokenTypes theType = eHTMLTokenTypes(theToken->GetTokenType()); - if (theTag != eHTMLTag_html || theType != eToken_start) { - theToken = NS_STATIC_CAST(CStartToken*,mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_html,NS_LITERAL_STRING("html"))); - if (theToken) { - mTokenizer->PushTokenFront(theToken); //this token should get pushed on the context stack. - } - } - } - else { - theToken = NS_STATIC_CAST(CStartToken*,mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_html,NS_LITERAL_STRING("html"))); - if (theToken) { - mTokenizer->PushTokenFront(theToken); //this token should get pushed on the context stack. - } - } - } - - mSink->WillProcessTokens(); - - while (NS_SUCCEEDED(result)) { - if (!(mFlags & NS_DTD_FLAG_STOP_PARSING)) { - CToken* theToken = mTokenizer->PopToken(); - if (theToken) { - result = HandleToken(theToken,aParser); - } - else break; - } - else { - result = NS_ERROR_HTMLPARSER_STOPPARSING; - break; - } - - if ((NS_ERROR_HTMLPARSER_INTERRUPTED == mSink->DidProcessAToken())) { - // The content sink has requested that DTD interrupt processing tokens - // So we need to make sure the parser is in a state where it can be - // interrupted. - // The mParser->CanInterrupt will return TRUE if BuildModel was called - // from a place in the parser where it prepared to handle a return value of - // NS_ERROR_HTMLPARSER_INTERRUPTED. - // If the parser has mPrevContext then it may be processing - // Script so we should not allow it to be interrupted. - - if ((mParser->CanInterrupt()) && - (nsnull == mParser->PeekContext()->mPrevContext) && - (eHTMLTag_unknown==mSkipTarget)) { - result = NS_ERROR_HTMLPARSER_INTERRUPTED; - break; - } - } - }//while - mTokenizer = oldTokenizer; - } - else { - result = mFlags & NS_DTD_FLAG_STOP_PARSING ? NS_ERROR_HTMLPARSER_STOPPARSING : result; - } - } - - return result; -} - -/** - * @param aTarget - Tag that was neglected in the document. - * @param aType - Specifies the type of the target. Ex. start, end, text, etc. - * @param aParser - Parser to drive this process - * @param aSink - HTML Content sink - */ -nsresult -CNavDTD::BuildNeglectedTarget(eHTMLTags aTarget, - eHTMLTokenTypes aType, - nsIParser* aParser, - nsIContentSink* aSink) -{ - NS_ASSERTION(mTokenizer, "tokenizer is null! unable to build target."); - NS_ASSERTION(mTokenAllocator, "unable to create tokens without an allocator."); - if (!mTokenizer || !mTokenAllocator) - return NS_OK; - if (eHTMLTag_unknown != mSkipTarget && eHTMLTag_title == aTarget) { - PRInt32 size = mSkippedContent.GetSize(); - // Note: The first location of the skipped content - // deque contains the opened-skip-target. Do not include - // that when guessing title contents. The term "guessing" - // is used because the document did not contain an end title - // and hence it's almost impossible to know what markup - // should belong in the title. The assumption used here is that - // if the markup is anything other than "text", or "entity" or, - // "whitespace" then it's least likely to belong in the title. - PRInt32 index; - for (index = 1; index < size; index++) { - CHTMLToken* token = - NS_REINTERPRET_CAST(CHTMLToken*, mSkippedContent.ObjectAt(index)); - NS_ASSERTION(token, "there is a null token in the skipped content list!"); - eHTMLTokenTypes type = eHTMLTokenTypes(token->GetTokenType()); - if (eToken_whitespace != type && - eToken_newline != type && - eToken_text != type && - eToken_entity != type && - eToken_attribute != type) { - // Now pop the tokens that do not belong ( just a guess work ) - // in the title and push them into the tokens queue. - while (size != index++) { - token = NS_REINTERPRET_CAST(CHTMLToken*, mSkippedContent.Pop()); - mTokenizer->PushTokenFront(token); - } - break; - } - } - } - CHTMLToken* target = - NS_STATIC_CAST(CHTMLToken*, mTokenAllocator->CreateTokenOfType(aType, aTarget)); - mTokenizer->PushTokenFront(target); - return BuildModel(aParser, mTokenizer, 0, aSink); -} - -/** - * - * @update gess5/18/98 - * @param - * @return - */ -nsresult CNavDTD::DidBuildModel(nsresult anErrorCode, - PRBool aNotifySink, - nsIParser* aParser, - nsIContentSink* aSink) -{ - if (!aSink) - return NS_OK; - nsresult result = NS_OK; - if (aParser && aNotifySink) { - if (NS_OK == anErrorCode) { - if (eHTMLTag_unknown != mSkipTarget) { - // Looks like there is an open target ( ex. , <textarea> ). - // Create a matching target to handle the unclosed target. - result = BuildNeglectedTarget(mSkipTarget, eToken_end, aParser, aSink); - NS_ENSURE_SUCCESS(result , result); - } - if (!(mFlags & (NS_DTD_FLAG_HAD_FRAMESET | NS_DTD_FLAG_HAD_BODY))) { - // This document is not a frameset document, however, it did not contain - // a body tag either. So, make one!. Note: Body tag is optional per spec.. - result = BuildNeglectedTarget(eHTMLTag_body, eToken_start, aParser, aSink); - NS_ENSURE_SUCCESS(result , result); - } - if (mFlags & NS_DTD_FLAG_MISPLACED_CONTENT) { - // Looks like the misplaced contents are not processed yet. - // Here is our last chance to handle the misplaced content. - mFlags &= ~NS_DTD_FLAG_MISPLACED_CONTENT; - - // mContextTopIndex refers to the misplaced content's legal parent index. - result = HandleSavedTokens(mBodyContext->mContextTopIndex); - NS_ENSURE_SUCCESS(result, result); - - mBodyContext->mContextTopIndex = -1; - } - //now let's disable style handling to save time when closing remaining stack members... - mFlags &= ~NS_DTD_FLAG_ENABLE_RESIDUAL_STYLE; - while (mBodyContext->GetCount() > 0) { - result = CloseContainersTo(mBodyContext->Last(), PR_FALSE); - if (NS_FAILED(result)) { - //No matter what, you need to call did build model. - aSink->DidBuildModel(); - return result; - } - } - } - else { - //If you're here, then an error occured, but we still have nodes on the stack. - //At a minimum, we should grab the nodes and recycle them. - //Just to be correct, we'll also recycle the nodes. - while (mBodyContext->GetCount() > 0) { - nsEntryStack* theChildStyles = 0; - nsCParserNode* theNode = mBodyContext->Pop(theChildStyles); - IF_DELETE(theChildStyles,&mNodeAllocator); - IF_FREE(theNode, &mNodeAllocator); - } - } - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::DidBuildModel(), this=%p\n", this)); - -#ifdef ENABLE_CRC - - //let's only grab this state once! - if (!gShowCRC) { - gShowCRC=1; //this only indicates we'll not initialize again. - char* theEnvString = PR_GetEnv("RICKG_CRC"); - if (theEnvString){ - if (('1'== theEnvString[0]) || ('Y'== theEnvString[0]) || ('y'== theEnvString[0])){ - gShowCRC=2; //this indicates that the CRC flag was found in the environment. - } - } - } - - if (2 == gShowCRC) { - if (mComputedCRC32 != mExpectedCRC32) { - if (mExpectedCRC32 != 0) { - printf("CRC Computed: %u Expected CRC: %u\n,",mComputedCRC32,mExpectedCRC32); - result = aSink->DidBuildModel(); - } - else { - printf("Computed CRC: %u.\n",mComputedCRC32); - result = aSink->DidBuildModel(); - NS_ENSURE_SUCCESS(result, result); - } - } - } -#endif - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::DidBuildModel(), this=%p\n", this)); - START_TIMER(); - - //Now make sure the misplaced content list is empty, - //by forcefully recycling any tokens we might find there. - - CToken* theToken = 0; - while ((theToken = (CToken*)mMisplacedContent.Pop())) { - IF_FREE(theToken, mTokenAllocator); - } - } //if aparser - - //No matter what, you need to call did build model. - return aSink->DidBuildModel(); -} - -NS_IMETHODIMP_(void) -CNavDTD::Terminate() -{ - mFlags |= NS_DTD_FLAG_STOP_PARSING; -} - - -NS_IMETHODIMP_(PRInt32) -CNavDTD::GetType() -{ - return NS_IPARSER_FLAG_HTML; -} - -/** - * --- Backwards compatibility --- - * Use this method to determine if the tag in question needs a BODY. - * --- - */ -static -PRBool DoesRequireBody(CToken* aToken,nsITokenizer* aTokenizer) { - - PRBool result=PR_FALSE; - - if(aToken) { - eHTMLTags theTag=(eHTMLTags)aToken->GetTypeID(); - if(gHTMLElements[theTag].HasSpecialProperty(kRequiresBody)) { - if(theTag==eHTMLTag_input) { - // IE & Nav4x opens up a body for type=text - Bug 66985 - PRInt32 ac=aToken->GetAttributeCount(); - for(PRInt32 i=0; i<ac; ++i) { - CAttributeToken* attr=NS_STATIC_CAST(CAttributeToken*,aTokenizer->GetTokenAt(i)); - const nsAString& name=attr->GetKey(); - const nsAString& value=attr->GetValue(); - - if((name.Equals(NS_LITERAL_STRING("type")) || - name.Equals(NS_LITERAL_STRING("TYPE"))) - && - !(value.Equals(NS_LITERAL_STRING("hidden")) || - value.Equals(NS_LITERAL_STRING("HIDDEN")))) { - result=PR_TRUE; - break; - } - }//for - } - else { - result=PR_TRUE; - } - } - } - - return result; -} - -static void -InPlaceConvertLineEndings( nsAString& aString ) -{ - // go from '\r\n' or '\r' to '\n' - nsAString::iterator iter; - aString.BeginWriting(iter); - - PRUnichar* S = iter.get(); - size_t N = iter.size_forward(); - - // this fragment must be the entire string because - // (a) no multi-fragment string is writable, so only an illegal cast could give us one, and - // (b) else we would have to do more work (watching for |to| to fall off the end) - NS_ASSERTION(aString.Length() == N, "You cheated... multi-fragment strings are never writable!"); - - // we scan/convert in two phases (but only one pass over the string) - // until we have to skip a character, we only need to touch end-of-line chars - // after that, we'll have to start moving every character we want to keep - - // use array indexing instead of pointers, because compilers optimize that better - - - // this first loop just converts line endings... no characters get moved - size_t i = 0; - PRBool just_saw_cr = PR_FALSE; - for ( ; i < N; ++i ) - { - // if it's something we need to convert... - if ( S[i] == '\r' ) - { - S[i] = '\n'; - just_saw_cr = PR_TRUE; - } - else - { - // else, if it's something we need to skip... - // i.e., a '\n' immediately following a '\r', - // then we need to start moving any character we want to keep - // and we have a second loop for that, so get out of this one - if ( S[i] == '\n' && just_saw_cr ) - break; - - just_saw_cr = PR_FALSE; - } - } - - - // this second loop handles the rest of the buffer, moving characters down - // _and_ converting line-endings as it goes - // start the loop at |from = i| so that that |just_saw_cr| gets cleared automatically - size_t to = i; - for ( size_t from = i; from < N; ++from ) - { - // if it's something we need to convert... - if ( S[from] == '\r' ) - { - S[to++] = '\n'; - just_saw_cr = PR_TRUE; - } - else - { - // else, if it's something we need to copy... - // i.e., NOT a '\n' immediately following a '\r' - if ( S[from] != '\n' || !just_saw_cr ) - S[to++] = S[from]; - - just_saw_cr = PR_FALSE; - } - } - - // if we chopped characters out of the string, we need to shorten it logically - if ( to < N ) - aString.SetLength(to); -} - -/** - * This big dispatch method is used to route token handler calls to the right place. - * What's wrong with it? This table, and the dispatch methods themselves need to be - * moved over to the delegate. Ah, so much to do... - * - * @update gess 12/1/99 - * @param aToken - * @param aParser - * @return - */ -nsresult CNavDTD::HandleToken(CToken* aToken,nsIParser* aParser){ - nsresult result=NS_OK; - - if(aToken) { - CHTMLToken* theToken= NS_STATIC_CAST(CHTMLToken*, aToken); - eHTMLTokenTypes theType=eHTMLTokenTypes(theToken->GetTokenType()); - eHTMLTags theTag=(eHTMLTags)theToken->GetTypeID(); - PRBool execSkipContent=PR_FALSE; - - aToken->SetLineNumber(mLineNumber); - - mLineNumber += aToken->GetNewlineCount(); - - /* --------------------------------------------------------------------------------- - To understand this little piece of code, you need to look below too. - In essence, this code caches "skipped content" until we find a given skiptarget. - Once we find the skiptarget, we take all skipped content up to that point and - coallate it. Then we push those tokens back onto the tokenizer deque. - --------------------------------------------------------------------------------- - */ - - // printf("token: %p\n",aToken); - - if(mSkipTarget){ //handle a preexisting target... - if((theTag==mSkipTarget) && (eToken_end==theType)){ - mSkipTarget=eHTMLTag_unknown; //stop skipping. - //mTokenizer->PushTokenFront(aToken); //push the end token... - execSkipContent=PR_TRUE; - IF_FREE(aToken, mTokenAllocator); - theToken=(CHTMLToken*)mSkippedContent.PopFront(); - theType=eToken_start; - } - else { - mSkippedContent.Push(theToken); - return result; - } - } - else if(mFlags & NS_DTD_FLAG_ALTERNATE_CONTENT) { - if(theTag != mBodyContext->Last() || theType!=eToken_end) { - // attribute source is a part of start token. - if(theType!=eToken_attribute) { - aToken->AppendSourceTo(mScratch); - } - IF_FREE(aToken, mTokenAllocator); - return result; - } - else { - // If you're here then we have either seen a /noscript, - // or /noframes, or /iframe. After handling the text token - // intentionally fall thro' to handle the current end token. - CTextToken theTextToken(mScratch); - result=HandleStartToken(&theTextToken); - - if(NS_FAILED(result)) { - return result; - } - - mScratch.Truncate(); - mScratch.SetCapacity(0); - } - } - else if(mFlags & NS_DTD_FLAG_MISPLACED_CONTENT) { - // Included TD & TH to fix Bug# 20797 - static eHTMLTags gLegalElements[]={eHTMLTag_table,eHTMLTag_thead,eHTMLTag_tbody, - eHTMLTag_tr,eHTMLTag_td,eHTMLTag_th,eHTMLTag_tfoot}; - if(theToken) { - eHTMLTags theParentTag=mBodyContext->Last(); - theTag=(eHTMLTags)theToken->GetTypeID(); - if((FindTagInSet(theTag,gLegalElements,sizeof(gLegalElements)/sizeof(theTag))) || - (gHTMLElements[theParentTag].CanContain(theTag,mDTDMode)) && (theTag!=eHTMLTag_comment)) { // Added comment -> bug 40855 - - mFlags &= ~NS_DTD_FLAG_MISPLACED_CONTENT; // reset the state since all the misplaced tokens are about to get handled. - - result = HandleSavedTokens(mBodyContext->mContextTopIndex); - NS_ENSURE_SUCCESS(result, result); - - mBodyContext->mContextTopIndex = -1; - - if (mSkipTarget) { - mSkippedContent.Push(theToken); - return result; - } - // Fall through if the skipped content collection is |not| in progress - bug 124788 - } - else { - PushIntoMisplacedStack(theToken); - return result; - } - } - } - - - /* --------------------------------------------------------------------------------- - This section of code is used to "move" misplaced content from one location in - our document model to another. (Consider what would happen if we found a <P> tag - and text in the head.) To move content, we throw it onto the misplacedcontent - deque until we can deal with it. - --------------------------------------------------------------------------------- - */ - if(!execSkipContent) { - - switch(theTag) { - case eHTMLTag_html: - case eHTMLTag_noframes: - case eHTMLTag_noscript: - case eHTMLTag_script: - case eHTMLTag_doctypeDecl: - case eHTMLTag_instruction: - break; - case eHTMLTag_comment: - case eHTMLTag_newline: - case eHTMLTag_whitespace: - case eHTMLTag_userdefined: - if (mMisplacedContent.GetSize() == 0) { - // simply pass these through to token handler without further ado... - // fix for bugs 17017,18308,23765,24275,69331 - break; - } - default: - if(!gHTMLElements[eHTMLTag_html].SectionContains(theTag,PR_FALSE)) { - if(!(mFlags & (NS_DTD_FLAG_HAD_BODY | NS_DTD_FLAG_HAD_FRAMESET))) { - - //For bug examples from this code, see bugs: 18928, 20989. - - //At this point we know the body/frameset aren't open. - //If the child belongs in the head, then handle it (which may open the head); - //otherwise, push it onto the misplaced stack. - - PRBool theExclusive=PR_FALSE; - PRBool theChildBelongsInHead=gHTMLElements[eHTMLTag_head].IsChildOfHead(theTag,theExclusive); - if(!theChildBelongsInHead) { - - //If you're here then we found a child of the body that was out of place. - //We're going to move it to the body by storing it temporarily on the misplaced stack. - //However, in quirks mode, a few tags request, ambiguosly, for a BODY. - Bugs 18928, 24204.- - PushIntoMisplacedStack(aToken); - if(DoesRequireBody(aToken,mTokenizer)) { - CToken* theBodyToken=NS_STATIC_CAST(CToken*,mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_body,NS_LITERAL_STRING("body"))); - result=HandleToken(theBodyToken,aParser); - } - return result; - } - } //if - } //if - }//switch - - } //if - - if(theToken){ - //Before dealing with the token normally, we need to deal with skip targets - CStartToken* theStartToken=NS_STATIC_CAST(CStartToken*,aToken); - if((!execSkipContent) && - (theType!=eToken_end) && - (eHTMLTag_unknown==mSkipTarget) && - (gHTMLElements[theTag].mSkipTarget) && - (!theStartToken->IsEmpty())) { // added empty token check for bug 44186 - //create a new target - NS_ASSERTION(mSkippedContent.GetSize() == 0, "all the skipped content tokens did not get handled"); - mSkippedContent.Empty(); - mSkipTarget=gHTMLElements[theTag].mSkipTarget; - mSkippedContent.Push(theToken); - } - else { - - mParser=(nsParser*)aParser; - - switch(theType) { - case eToken_text: - case eToken_start: - case eToken_whitespace: - case eToken_newline: - result=HandleStartToken(theToken); break; - - case eToken_end: - result=HandleEndToken(theToken); break; - - case eToken_cdatasection: - case eToken_comment: - case eToken_markupDecl: - result=HandleCommentToken(theToken); break; - - case eToken_entity: - result=HandleEntityToken(theToken); break; - - case eToken_attribute: - result=HandleAttributeToken(theToken); break; - - case eToken_instruction: - result=HandleProcessingInstructionToken(theToken); break; - - case eToken_doctypeDecl: - result=HandleDocTypeDeclToken(theToken); break; - - default: - break; - }//switch - - - if(NS_SUCCEEDED(result) || (NS_ERROR_HTMLPARSER_BLOCK==result)) { - IF_FREE(theToken, mTokenAllocator); - } - else if(result==NS_ERROR_HTMLPARSER_STOPPARSING) { - mFlags |= NS_DTD_FLAG_STOP_PARSING; - } - else { - return NS_OK; - } - } - } - - }//if - return result; -} - -/** - * This gets called after we've handled a given start tag. - * It's a generic hook to let us to post processing. - * @param aToken contains the tag in question - * @param aChildTag is the tag itself. - * @return status - */ -nsresult CNavDTD::DidHandleStartTag(nsIParserNode& aNode,eHTMLTags aChildTag){ - nsresult result=NS_OK; - -#if 0 - // XXX --- Ignore this: it's just rickg debug testing... - nsAutoString theStr; - aNode.GetSource(theStr); -#endif - - switch(aChildTag){ - - case eHTMLTag_pre: - case eHTMLTag_listing: - { - CToken* theNextToken=mTokenizer->PeekToken(); - if(theNextToken) { - eHTMLTokenTypes theType=eHTMLTokenTypes(theNextToken->GetTokenType()); - if(eToken_newline==theType){ - mLineNumber += theNextToken->GetNewlineCount(); - theNextToken=mTokenizer->PopToken(); //skip 1st newline inside PRE and LISTING - IF_FREE(theNextToken, mTokenAllocator); // fix for Bug 29379 - }//if - }//if - } - break; - - case eHTMLTag_plaintext: - case eHTMLTag_xmp: - //grab the skipped content and dump it out as text... - { - STOP_TIMER() - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::DidHandleStartTag(), this=%p\n", this)); - nsAutoString theString; - PRInt32 lineNo = 0; - - result = CollectSkippedContent(aChildTag, theString, lineNo); - NS_ENSURE_SUCCESS(result, result); - - if(0<theString.Length()) { - CTextToken *theToken=NS_STATIC_CAST(CTextToken*,mTokenAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text,theString)); - nsCParserNode theNode(theToken, mTokenAllocator); - result=mSink->AddLeaf(theNode); //when the node get's destructed, so does the new token - } - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::DidHandleStartTag(), this=%p\n", this)); - START_TIMER() - } - break; - -#ifdef DEBUG - case eHTMLTag_counter: - { - PRInt32 theCount=mBodyContext->GetCount(); - eHTMLTags theGrandParentTag=mBodyContext->TagAt(theCount-1); - - nsAutoString theNumber; - - mBodyContext->IncrementCounter(theGrandParentTag,aNode,theNumber); - - CTextToken theToken(theNumber); - nsCParserNode theNode(&theToken, 0 /*stack token*/); - result=mSink->AddLeaf(theNode); - } - break; - - case eHTMLTag_meta: - { - //we should only enable user-defined entities in debug builds... - - PRInt32 theCount=aNode.GetAttributeCount(); - const nsAString* theNamePtr=0; - const nsAString* theValuePtr=0; - - if(theCount) { - PRInt32 theIndex=0; - for(theIndex=0;theIndex<theCount;++theIndex){ - const nsAString& theKey = aNode.GetKeyAt(theIndex); - if(theKey.Equals(NS_LITERAL_STRING("ENTITY"), nsCaseInsensitiveStringComparator())) { - const nsAString& theName=aNode.GetValueAt(theIndex); - theNamePtr=&theName; - } - else if(theKey.Equals(NS_LITERAL_STRING("VALUE"), nsCaseInsensitiveStringComparator())) { - //store the named enity with the context... - const nsAString& theValue=aNode.GetValueAt(theIndex); - theValuePtr=&theValue; - } - } - } - if(theNamePtr && theValuePtr) { - mBodyContext->RegisterEntity(*theNamePtr,*theValuePtr); - } - } - break; -#endif - - default: - break; - }//switch - - //handle <empty/> tags by generating a close tag... - //added this to fix bug 48351, which contains XHTML and uses empty tags. - nsCParserNode* theNode=NS_STATIC_CAST(nsCParserNode*,&aNode); - if(nsHTMLElement::IsContainer(aChildTag) && theNode && theNode->mToken) { //nullptr test fixes bug 56085 - CStartToken *theToken=NS_STATIC_CAST(CStartToken*,theNode->mToken); - if(theToken->IsEmpty()){ - - CToken *theEndToken=mTokenAllocator->CreateTokenOfType(eToken_end,aChildTag); - if(theEndToken) { - result=HandleEndToken(theEndToken); - IF_FREE(theEndToken, mTokenAllocator); - } - } - } - - return result; -} - -/** - * Determine whether the given tag is open anywhere - * in our context stack. - * - * @update gess 4/2/98 - * @param eHTMLTags tag to be searched for in stack - * @return topmost index of tag on stack - */ -PRInt32 CNavDTD::LastOf(eHTMLTags aTagSet[],PRInt32 aCount) const { - int theIndex=0; - for(theIndex=mBodyContext->GetCount()-1;theIndex>=0;theIndex--){ - if(FindTagInSet((*mBodyContext)[theIndex],aTagSet,aCount)) { - return theIndex; - } - } - return kNotFound; -} - -/** - * Call this to find the index of a given child, or (if not found) - * the index of its nearest synonym. - * - * @update gess 3/25/98 - * @param aTagStack -- list of open tags - * @param aTag -- tag to test for containership - * @return index of kNotFound - */ -static -PRInt32 GetIndexOfChildOrSynonym(nsDTDContext& aContext,eHTMLTags aChildTag) { - -#if 1 - PRInt32 theChildIndex=nsHTMLElement::GetIndexOfChildOrSynonym(aContext,aChildTag); -#else - PRInt32 theChildIndex=aContext.LastOf(aChildTag); - if(kNotFound==theChildIndex) { - TagList* theSynTags=gHTMLElements[aChildTag].GetSynonymousTags(); //get the list of tags that THIS tag can close - if(theSynTags) { - theChildIndex=LastOf(aContext,*theSynTags); - } - else{ - PRInt32 theGroup=nsHTMLElement::GetSynonymousGroups(aChildTag); - if(theGroup) { - theChildIndex=aContext.GetCount(); - while(-1<--theChildIndex) { - eHTMLTags theTag=aContext[theChildIndex]; - if(gHTMLElements[theTag].IsMemberOf(theGroup)) { - break; - } - } - } - } - } -#endif - return theChildIndex; -} - -/** - * This method is called to determine whether or not the child - * tag is happy being OPENED in the context of the current - * tag stack. This is only called if the current parent thinks - * it wants to contain the given childtag. - * - * @param aChildTag -- tag enum of child to be opened - * @param aTagStack -- ref to current tag stack in DTD. - * @return PR_TRUE if child agrees to be opened here. - */ -static -PRBool CanBeContained(eHTMLTags aChildTag,nsDTDContext& aContext) { - - /* # Interesting test cases: Result: - * 1. <UL><LI>..<B>..<LI> inner <LI> closes outer <LI> - * 2. <CENTER><DL><DT><A><CENTER> allow nested <CENTER> - * 3. <TABLE><TR><TD><TABLE>... allow nested <TABLE> - * 4. <FRAMESET> ... <FRAMESET> - */ - - //Note: This method is going away. First we need to get the elementtable to do closures right, and - // therefore we must get residual style handling to work. - - //the changes to this method were added to fix bug 54651... - - PRBool result=PR_TRUE; - PRInt32 theCount=aContext.GetCount(); - - if(0<theCount){ - const TagList* theRootTags=gHTMLElements[aChildTag].GetRootTags(); - const TagList* theSpecialParents=gHTMLElements[aChildTag].GetSpecialParents(); - if(theRootTags) { - PRInt32 theRootIndex=LastOf(aContext,*theRootTags); - PRInt32 theSPIndex=(theSpecialParents) ? LastOf(aContext,*theSpecialParents) : kNotFound; - PRInt32 theChildIndex=GetIndexOfChildOrSynonym(aContext,aChildTag); - PRInt32 theTargetIndex=(theRootIndex>theSPIndex) ? theRootIndex : theSPIndex; - - if((theTargetIndex==theCount-1) || - ((theTargetIndex==theChildIndex) && gHTMLElements[aChildTag].CanContainSelf())) { - result=PR_TRUE; - } - else { - - result=PR_FALSE; - - static eHTMLTags gTableElements[]={eHTMLTag_td,eHTMLTag_th}; - - PRInt32 theIndex=theCount-1; - while(theChildIndex<theIndex) { - eHTMLTags theParentTag=aContext.TagAt(theIndex--); - if (gHTMLElements[theParentTag].IsMemberOf(kBlockEntity) || - gHTMLElements[theParentTag].IsMemberOf(kHeading) || - gHTMLElements[theParentTag].IsMemberOf(kPreformatted) || - gHTMLElements[theParentTag].IsMemberOf(kFormControl) || //fixes bug 44479 - gHTMLElements[theParentTag].IsMemberOf(kList)) { - if(!HasOptionalEndTag(theParentTag)) { - result=PR_TRUE; - break; - } - } - else if(FindTagInSet(theParentTag,gTableElements,sizeof(gTableElements)/sizeof(eHTMLTag_unknown))){ - result=PR_TRUE; //added this to catch a case we missed; bug 57173. - break; - } - } - } - } - } - - return result; - -} - -enum eProcessRule {eNormalOp,eLetInlineContainBlock}; - -/** - * This method gets called when a start token has been - * encountered in the parse process. If the current container - * can contain this tag, then add it. Otherwise, you have - * two choices: 1) create an implicit container for this tag - * to be stored in - * 2) close the top container, and add this to - * whatever container ends up on top. - * - * @update gess 3/25/98 - * @param aToken -- next (start) token to be handled - * @param aNode -- CParserNode representing this start token - * @return PR_TRUE if all went well; PR_FALSE if error occured - */ -nsresult CNavDTD::HandleDefaultStartToken(CToken* aToken,eHTMLTags aChildTag,nsCParserNode *aNode) { - NS_PRECONDITION(0!=aToken,kNullToken); - - nsresult result=NS_OK; - PRBool theChildIsContainer=nsHTMLElement::IsContainer(aChildTag); - - // client of parser is spefically trying to parse a fragment that - // may lack required context. Suspend containment rules if so. - if (mParserCommand != eViewFragment) - { - PRBool theChildAgrees=PR_TRUE; - PRInt32 theIndex=mBodyContext->GetCount(); - PRBool theParentContains=-1; - - do { - - eHTMLTags theParentTag=mBodyContext->TagAt(--theIndex); - theParentContains=CanContain(theParentTag,aChildTag); //precompute containment, and pass it to CanOmit()... - - if(CanOmit(theParentTag,aChildTag,theParentContains)) { - result=HandleOmittedTag(aToken,aChildTag,theParentTag,aNode); - return result; - } - - eProcessRule theRule=eNormalOp; - - if (!theParentContains && - (IsBlockElement(aChildTag,theParentTag) && - IsInlineElement(theParentTag,theParentTag))) { //broaden this to fix <inline><block></block></inline> - if (eHTMLTag_li != aChildTag) { //remove test for table to fix 57554 - nsCParserNode* theParentNode = NS_STATIC_CAST(nsCParserNode*, mBodyContext->PeekNode()); - if (theParentNode && theParentNode->mToken->IsWellFormed()) { - theRule = eLetInlineContainBlock; - } - } - } - - switch(theRule){ - - case eNormalOp: - - theChildAgrees=PR_TRUE; - if(theParentContains) { - - eHTMLTags theAncestor=gHTMLElements[aChildTag].mRequiredAncestor; - if(eHTMLTag_unknown!=theAncestor){ - theChildAgrees=HasOpenContainer(theAncestor); - } - - if(theChildAgrees && theChildIsContainer) { - if(theParentTag!=aChildTag) { - // Double check the power structure a - // Note: The bit is currently set on <A> and <LI>. - if(gHTMLElements[aChildTag].ShouldVerifyHierarchy()){ - PRInt32 theChildIndex=GetIndexOfChildOrSynonym(*mBodyContext,aChildTag); - - if((kNotFound<theChildIndex) && (theChildIndex<theIndex)) { - - /*------------------------------------------------------------------------------------- - 1 Here's a tricky case from bug 22596: <h5><li><h5> - How do we know that the 2nd <h5> should close the <LI> rather than nest inside the <LI>? - (Afterall, the <h5> is a legal child of the <LI>). - - The way you know is that there is no root between the two, so the <h5> binds more - tightly to the 1st <h5> than to the <LI>. - 2. Also, bug 6148 shows this case: <SPAN><DIV><SPAN> - From this case we learned not to execute this logic if the parent is a block. - - 3. Fix for 26583 - Ex. <A href=foo.html><B>foo<A href-bar.html>bar</A></B></A> <-- A legal HTML - In the above example clicking on "foo" or "bar" should link to - foo.html or bar.html respectively. That is, the inner <A> should be informed - about the presence of an open <A> above <B>..so that the inner <A> can close out - the outer <A>. The following code does it for us. - - 4. Fix for 27865 [ similer to 22596 ]. Ex: <DL><DD><LI>one<DD><LI>two - -------------------------------------------------------------------------------------*/ - - theChildAgrees=CanBeContained(aChildTag,*mBodyContext); - } //if - }//if - } //if - } //if - } //if parentcontains - - if(!(theParentContains && theChildAgrees)) { - if (!CanPropagate(theParentTag,aChildTag,theParentContains)) { - if(theChildIsContainer || (!theParentContains)){ - if(!theChildAgrees && !gHTMLElements[aChildTag].CanAutoCloseTag(*mBodyContext,aChildTag)) { - // Closing the tags above might cause non-compatible results. - // Ex. <TABLE><TR><TD><TBODY>Text</TD></TR></TABLE>. - // In the example above <TBODY> is badly misplaced, but - // we should not attempt to close the tags above it, - // The safest thing to do is to discard this tag. - return result; - } - else if (mBodyContext->mContextTopIndex > 0 && theIndex <= mBodyContext->mContextTopIndex) { - // Looks like the parent tag does not want to contain the current tag ( aChildTag ). - // However, we have to force the containment, when handling misplaced content, to avoid data loss. - // Ref. bug 138577. - theParentContains = PR_TRUE; - } - else { - CloseContainersTo(theIndex,aChildTag,PR_TRUE); - } - }//if - else break; - }//if - else { - CreateContextStackFor(aChildTag); - theIndex=mBodyContext->GetCount(); - } - }//if - break; - - case eLetInlineContainBlock: - theParentContains=theChildAgrees=PR_TRUE; //cause us to fall out of loop and open the block. - break; - - default: - break; - - }//switch - } while(!(theParentContains && theChildAgrees)); - } - - if(theChildIsContainer){ - result=OpenContainer(aNode,aChildTag,PR_TRUE); - } - else { //we're writing a leaf... - result=AddLeaf(aNode); - } - - return result; -} - -/** - * This gets called before we've handled a given start tag. - * It's a generic hook to let us do pre processing. - * @param aToken contains the tag in question - * @param aTag is the tag itself. - * @param aNode is the node (tag) with associated attributes. - * @return TRUE if tag processing should continue; FALSE if the tag has been handled. - */ -nsresult CNavDTD::WillHandleStartTag(CToken* aToken,eHTMLTags aTag,nsIParserNode& aNode) -{ - nsresult result = NS_OK; - - //this little gem creates a special attribute for the editor team to use. - //The attribute only get's applied to unknown tags, and is used by ender - //(during editing) to display a special icon for unknown tags. - if(eHTMLTag_userdefined == aTag) { - CAttributeToken* theToken= NS_STATIC_CAST(CAttributeToken*,mTokenAllocator->CreateTokenOfType(eToken_attribute,aTag)); - if(theToken) { - theToken->SetKey(NS_LITERAL_STRING("_moz-userdefined")); - aNode.AddAttribute(theToken); - } - } - - /************************************************************************************** - * - * Now a little code to deal with bug #49687 (crash when layout stack gets too deep) - * I've also opened this up to any container (not just inlines): re bug 55095 - * Improved to handle bug 55980 (infinite loop caused when DEPTH is exceeded and - * </P> is encountered by itself (<P>) is continuously produced. - * - **************************************************************************************/ - - PRInt32 stackDepth = mBodyContext->GetCount(); - if (stackDepth > MAX_REFLOW_DEPTH) { - if (nsHTMLElement::IsContainer(aTag) && - !gHTMLElements[aTag].HasSpecialProperty(kHandleStrayTag)) { - // Ref. bug 98261,49678,55095,55980 - // Instead of throwing away the current tag close it's parent - // such that the stack level does not go beyond the max_reflow_depth. - // This would allow leaf tags, that follow the current tag, to find - // the correct node. - while (stackDepth != MAX_REFLOW_DEPTH && NS_SUCCEEDED(result)) { - result = CloseContainersTo(mBodyContext->Last(),PR_FALSE); - --stackDepth; - } - } - } - - STOP_TIMER() - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::WillHandleStartTag(), this=%p\n", this)); - - if (aTag <= NS_HTML_TAG_MAX) { - result = mSink->NotifyTagObservers(&aNode); - } - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::WillHandleStartTag(), this=%p\n", this)); - START_TIMER() - - if(NS_SUCCEEDED(result)) { - -#ifdef ENABLE_CRC - - STOP_TIMER() - - if(eHTMLTag_meta==aTag) { - PRInt32 theCount=aNode.GetAttributeCount(); - if(1<theCount){ - - const nsAString& theKey = aNode.GetKeyAt(0); - if(theKey.Equals("NAME",IGNORE_CASE)) { - const nsString& theValue1=aNode.GetValueAt(0); - if(theValue1.Equals("\"CRC\"",IGNORE_CASE)) { - const nsAString& theKey2 = aNode.GetKeyAt(1); - if(theKey2.Equals("CONTENT",IGNORE_CASE)) { - const nsString& theValue2=aNode.GetValueAt(1); - PRInt32 err=0; - mExpectedCRC32=theValue2.ToInteger(&err); - } //if - } //if - } //else - - } //if - }//if - - START_TIMER() - -#endif - - if(NS_OK==result) { - result=gHTMLElements[aTag].HasSpecialProperty(kDiscardTag) ? 1 : NS_OK; - } - - //this code is here to make sure the head is closed before we deal - //with any tags that don't belong in the head. - if (NS_SUCCEEDED(result) && (mFlags & NS_DTD_FLAG_HAS_OPEN_HEAD && - eHTMLTag_newline != aTag && - eHTMLTag_whitespace != aTag && - eHTMLTag_userdefined != aTag)) { - PRBool theExclusive = PR_FALSE; - if (!gHTMLElements[eHTMLTag_head].IsChildOfHead(aTag, theExclusive)) { - result = CloseHead(); - } - } - } - return result; -} - -static void PushMisplacedAttributes(nsIParserNode& aNode,nsDeque& aDeque,PRInt32& aCount) { - if(aCount > 0) { - CToken* theAttrToken=nsnull; - nsCParserNode* theAttrNode = (nsCParserNode*)&aNode; - if(theAttrNode) { - while(aCount){ - theAttrToken=theAttrNode->PopAttributeToken(); - if(theAttrToken) { - theAttrToken->SetNewlineCount(0); - aDeque.Push(theAttrToken); - } - aCount--; - }//while - }//if - }//if -} - -/** - * This method gets called when a start token has been encountered that the parent - * wants to omit. - * - * @update gess 3/25/98 - * @param aToken -- next (start) token to be handled - * @param aChildTag -- id of the child in question - * @param aParent -- id of the parent in question - * @param aNode -- CParserNode representing this start token - * @return PR_TRUE if all went well; PR_FALSE if error occured - */ -nsresult CNavDTD::HandleOmittedTag(CToken* aToken,eHTMLTags aChildTag,eHTMLTags aParent,nsIParserNode* aNode) { - NS_PRECONDITION(mBodyContext != nsnull,"need a context to work with"); - - nsresult result=NS_OK; - - //The trick here is to see if the parent can contain the child, but prefers not to. - //Only if the parent CANNOT contain the child should we look to see if it's potentially a child - //of another section. If it is, the cache it for later. - // 1. Get the root node for the child. See if the ultimate node is the BODY, FRAMESET, HEAD or HTML - PRInt32 theTagCount = mBodyContext->GetCount(); - - if(aToken) { - PRInt32 attrCount = aToken->GetAttributeCount(); - if((gHTMLElements[aParent].HasSpecialProperty(kBadContentWatch)) && - (!nsHTMLElement::IsWhitespaceTag(aChildTag))) { - eHTMLTags theTag=eHTMLTag_unknown; - - // Determine the insertion point - while(theTagCount > 0) { - theTag = mBodyContext->TagAt(--theTagCount); - if(!gHTMLElements[theTag].HasSpecialProperty(kBadContentWatch)) { - mBodyContext->mContextTopIndex = theTagCount; // This is our insertion point - break; - } - } - - if(mBodyContext->mContextTopIndex>-1) { - - PushIntoMisplacedStack(aToken); - - IF_HOLD(aToken); // Hold on to this token for later use. - - // If the token is attributed then save those attributes too. - if(attrCount > 0) PushMisplacedAttributes(*aNode,mMisplacedContent,attrCount); - - if(gHTMLElements[aChildTag].mSkipTarget) { - nsAutoString theString; - PRInt32 lineNo = 0; - - result = CollectSkippedContent(aChildTag, theString, lineNo); - NS_ENSURE_SUCCESS(result, result); - - PushIntoMisplacedStack(mTokenAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text,theString)); - PushIntoMisplacedStack(mTokenAllocator->CreateTokenOfType(eToken_end,aChildTag)); - } - - mFlags |= NS_DTD_FLAG_MISPLACED_CONTENT; // This state would help us in gathering all the misplaced elements - }//if - }//if - - if((aChildTag!=aParent) && (gHTMLElements[aParent].HasSpecialProperty(kSaveMisplaced))) { - - IF_HOLD(aToken); // Hold on to this token for later use. Ref Bug. 53695 - - PushIntoMisplacedStack(aToken); - // If the token is attributed then save those attributes too. - if(attrCount > 0) PushMisplacedAttributes(*aNode,mMisplacedContent,attrCount); - } - } - return result; -} - -/** - * This method gets called when a kegen token is found. - * - * @update harishd 05/02/00 - * @param aNode -- CParserNode representing keygen - * @return NS_OK if all went well; ERROR if error occured - */ -nsresult CNavDTD::HandleKeyGen(nsIParserNode* aNode) { - nsresult result=NS_OK; - - if(aNode) { - - nsCOMPtr<nsIFormProcessor> theFormProcessor = - do_GetService(kFormProcessorCID, &result); - - if(NS_SUCCEEDED(result)) { - PRInt32 theAttrCount=aNode->GetAttributeCount(); - nsVoidArray theContent; - nsAutoString theAttribute; - nsAutoString theFormType; - CToken* theToken=nsnull; - - theFormType.Assign(NS_LITERAL_STRING("select")); - - result=theFormProcessor->ProvideContent(theFormType,theContent,theAttribute); - - if(NS_SUCCEEDED(result)) { - nsString* theTextValue=nsnull; - PRInt32 theIndex=nsnull; - - if(mTokenizer && mTokenAllocator) { - // Populate the tokenizer with the fabricated elements in the reverse order - // such that <SELECT> is on the top fo the tokenizer followed by <OPTION>s - // and </SELECT> - theToken=mTokenAllocator->CreateTokenOfType(eToken_end,eHTMLTag_select); - mTokenizer->PushTokenFront(theToken); - - for(theIndex=theContent.Count()-1;theIndex>-1;theIndex--) { - theTextValue=(nsString*)theContent[theIndex]; - theToken=mTokenAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text,*theTextValue); - mTokenizer->PushTokenFront(theToken); - theToken=mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_option); - mTokenizer->PushTokenFront(theToken); - } - - // The attribute ( provided by the form processor ) should be a part of the SELECT. - // Placing the attribute token on the tokenizer to get picked up by the SELECT. - theToken=mTokenAllocator->CreateTokenOfType(eToken_attribute,eHTMLTag_unknown,theAttribute); - - ((CAttributeToken*)theToken)->SetKey(NS_LITERAL_STRING("_moz-type")); - mTokenizer->PushTokenFront(theToken); - - // Pop out NAME and CHALLENGE attributes ( from the keygen NODE ) - // and place it in the tokenizer such that the attribtues get - // sucked into SELECT node. - for(theIndex=theAttrCount;theIndex>0;theIndex--) { - mTokenizer->PushTokenFront(((nsCParserNode*)aNode)->PopAttributeToken()); - } - - theToken=mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_select); - // Increament the count because of the additional attribute from the form processor. - theToken->SetAttributeCount(theAttrCount+1); - mTokenizer->PushTokenFront(theToken); - }//if(mTokenizer && mTokenAllocator) - }//if(NS_SUCCEEDED(result)) - }// if(NS_SUCCEEDED(result)) - } //if(aNode) - return result; -} - - -/** - * This method gets called when a start token has been - * encountered in the parse process. If the current container - * can contain this tag, then add it. Otherwise, you have - * two choices: 1) create an implicit container for this tag - * to be stored in - * 2) close the top container, and add this to - * whatever container ends up on top. - * - * @update gess 1/04/99 - * @param aToken -- next (start) token to be handled - * @param aNode -- CParserNode representing this start token - * @return PR_TRUE if all went well; PR_FALSE if error occured - */ -nsresult CNavDTD::HandleStartToken(CToken* aToken) { - NS_PRECONDITION(0!=aToken,kNullToken); - - //Begin by gathering up attributes... - - nsCParserNode* theNode=mNodeAllocator.CreateNode(aToken, mTokenAllocator); - - eHTMLTags theChildTag=(eHTMLTags)aToken->GetTypeID(); - PRInt16 attrCount=aToken->GetAttributeCount(); - eHTMLTags theParent=mBodyContext->Last(); - nsresult result=(0==attrCount) ? NS_OK : CollectAttributes(*theNode,theChildTag,attrCount); - - if(NS_OK==result) { - result=WillHandleStartTag(aToken,theChildTag,*theNode); - if(NS_OK==result) { - PRBool isTokenHandled =PR_FALSE; - PRBool theHeadIsParent=PR_FALSE; - - if(nsHTMLElement::IsSectionTag(theChildTag)){ - switch(theChildTag){ - case eHTMLTag_html: - if(mBodyContext->GetCount()>0) { - result=OpenContainer(theNode,theChildTag,PR_FALSE); - isTokenHandled=PR_TRUE; - } - break; - case eHTMLTag_body: - if(mFlags & NS_DTD_FLAG_HAS_OPEN_BODY) { - result=OpenContainer(theNode,theChildTag,PR_FALSE); - isTokenHandled=PR_TRUE; - } - break; - case eHTMLTag_head: - if(mFlags & (NS_DTD_FLAG_HAD_BODY | NS_DTD_FLAG_HAD_FRAMESET)) { - result=HandleOmittedTag(aToken,theChildTag,theParent,theNode); - isTokenHandled=PR_TRUE; - } - break; - default: - break; - } - } - - PRBool theExclusive=PR_FALSE; - theHeadIsParent=nsHTMLElement::IsChildOfHead(theChildTag,theExclusive); - - switch(theChildTag) { - case eHTMLTag_area: - if(!mOpenMapCount) isTokenHandled=PR_TRUE; - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::HandleStartToken(), this=%p\n", this)); - - if (mOpenMapCount>0 && mSink) { - result=mSink->AddLeaf(*theNode); - isTokenHandled=PR_TRUE; - } - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::HandleStartToken(), this=%p\n", this)); - START_TIMER(); - break; - - case eHTMLTag_image: - aToken->SetTypeID(theChildTag=eHTMLTag_img); - break; - - case eHTMLTag_keygen: - result=HandleKeyGen(theNode); - isTokenHandled=PR_TRUE; - break; - - case eHTMLTag_script: - theHeadIsParent = !(mFlags & NS_DTD_FLAG_HAS_OPEN_BODY); - mFlags |= NS_DTD_FLAG_HAS_OPEN_SCRIPT; - - default: - break; - }//switch - - if(!isTokenHandled) { - if(theHeadIsParent || ((mFlags & NS_DTD_FLAG_HAS_OPEN_HEAD) && - (eHTMLTag_newline == theChildTag || - eHTMLTag_whitespace == theChildTag || - eHTMLTag_userdefined == theChildTag))) { - result = AddHeadLeaf(theNode); - } - else - result = HandleDefaultStartToken(aToken,theChildTag,theNode); - } - - //now do any post processing necessary on the tag... - if(NS_OK==result) - DidHandleStartTag(*theNode,theChildTag); - }//if - } //if - - if(kHierarchyTooDeep==result) { - //reset this error to ok; all that happens here is that given inline tag - //gets dropped because the stack is too deep. Don't terminate parsing. - result=NS_OK; - } - - IF_FREE(theNode, &mNodeAllocator); - return result; -} - -/** - * Call this to see if you have a closeable peer on the stack that - * is ABOVE one of its root tags. - * - * @update gess 4/11/99 - * @param aRootTagList -- list of root tags for aTag - * @param aTag -- tag to test for containership - * @return PR_TRUE if given tag can contain other tags - */ -static -PRBool HasCloseablePeerAboveRoot(const TagList& aRootTagList,nsDTDContext& aContext,eHTMLTags aTag,PRBool anEndTag) { - PRInt32 theRootIndex=LastOf(aContext,aRootTagList); - const TagList* theCloseTags=(anEndTag) ? gHTMLElements[aTag].GetAutoCloseEndTags() : gHTMLElements[aTag].GetAutoCloseStartTags(); - PRInt32 theChildIndex=-1; - - if(theCloseTags) { - theChildIndex=LastOf(aContext,*theCloseTags); - } - else { - if((anEndTag) || (!gHTMLElements[aTag].CanContainSelf())) - theChildIndex=aContext.LastOf(aTag); - } - // I changed this to theRootIndex<=theChildIndex so to handle this case: - // <SELECT><OPTGROUP>...</OPTGROUP> - // - return PRBool(theRootIndex<=theChildIndex); -} - - -/** - * This method is called to determine whether or not an END tag - * can be autoclosed. This means that based on the current - * context, the stack should be closed to the nearest matching - * tag. - * - * @param aTag -- tag enum of child to be tested - * @return PR_TRUE if autoclosure should occur - */ -static -eHTMLTags FindAutoCloseTargetForEndTag(eHTMLTags aCurrentTag,nsDTDContext& aContext,nsDTDMode aMode) { - int theTopIndex=aContext.GetCount(); - eHTMLTags thePrevTag=aContext.Last(); - - if(nsHTMLElement::IsContainer(aCurrentTag)){ - PRInt32 theChildIndex=GetIndexOfChildOrSynonym(aContext,aCurrentTag); - - if(kNotFound<theChildIndex) { - if(thePrevTag==aContext[theChildIndex]){ - return aContext[theChildIndex]; - } - - if(nsHTMLElement::IsBlockCloser(aCurrentTag)) { - - /*here's what to do: - Our here is sitting at aChildIndex. There are other tags above it - on the stack. We have to try to close them out, but we may encounter - one that can block us. The way to tell is by comparing each tag on - the stack against our closeTag and rootTag list. - - For each tag above our hero on the stack, ask 3 questions: - 1. Is it in the closeTag list? If so, the we can skip over it - 2. Is it in the rootTag list? If so, then we're gated by it - 3. Otherwise its non-specified and we simply presume we can close it. - */ - - const TagList* theCloseTags=gHTMLElements[aCurrentTag].GetAutoCloseEndTags(); - const TagList* theRootTags=gHTMLElements[aCurrentTag].GetEndRootTags(); - - if(theCloseTags){ - //at a min., this code is needed for H1..H6 - - while(theChildIndex<--theTopIndex) { - eHTMLTags theNextTag=aContext[theTopIndex]; - if(PR_FALSE==FindTagInSet(theNextTag,theCloseTags->mTags,theCloseTags->mCount)) { - if(PR_TRUE==FindTagInSet(theNextTag,theRootTags->mTags,theRootTags->mCount)) { - return eHTMLTag_unknown; //we encountered a tag in root list so fail (because we're gated). - } - //otherwise presume it's something we can simply ignore and continue search... - } - //otherwise its in the close list so skip to next tag... - } - eHTMLTags theTarget=aContext.TagAt(theChildIndex); - if(aCurrentTag!=theTarget) { - aCurrentTag=theTarget; //use the synonym. - } - return aCurrentTag; //if you make it here, we're ungated and found a target! - }//if - else if(theRootTags) { - //since we didn't find any close tags, see if there is an instance of aCurrentTag - //above the stack from the roottag. - if(HasCloseablePeerAboveRoot(*theRootTags,aContext,aCurrentTag,PR_TRUE)) - return aCurrentTag; - else return eHTMLTag_unknown; - } - } //if blockcloser - else{ - //Ok, a much more sensible approach for non-block closers; use the tag group to determine closure: - //For example: %phrasal closes %phrasal, %fontstyle and %special - return gHTMLElements[aCurrentTag].GetCloseTargetForEndTag(aContext,theChildIndex,aMode); - } - }//if - } //if - return eHTMLTag_unknown; -} - -/** - * - * @param - * @param - * @update gess 10/11/99 - * @return nada - */ -static void StripWSFollowingTag(eHTMLTags aChildTag,nsITokenizer* aTokenizer,nsTokenAllocator* aTokenAllocator, PRInt32& aNewlineCount){ - CToken* theToken= (aTokenizer)? aTokenizer->PeekToken():nsnull; - - if(aTokenAllocator) { - while(theToken) { - eHTMLTokenTypes theType=eHTMLTokenTypes(theToken->GetTokenType()); - switch(theType) { - case eToken_newline: ++aNewlineCount; - case eToken_whitespace: - theToken=aTokenizer->PopToken(); - IF_FREE(theToken, aTokenAllocator); - theToken=aTokenizer->PeekToken(); - break; - default: - theToken=0; - break; - } - } - } -} - -/** - * This method gets called when an end token has been - * encountered in the parse process. If the end tag matches - * the start tag on the stack, then simply close it. Otherwise, - * we have a erroneous state condition. This can be because we - * have a close tag with no prior open tag (user error) or because - * we screwed something up in the parse process. I'm not sure - * yet how to tell the difference. - * - * @update gess 3/25/98 - * @param aToken -- next (start) token to be handled - * @return PR_TRUE if all went well; PR_FALSE if error occured - */ -nsresult CNavDTD::HandleEndToken(CToken* aToken) { - NS_PRECONDITION(0!=aToken,kNullToken); - - nsresult result=NS_OK; - eHTMLTags theChildTag=(eHTMLTags)aToken->GetTypeID(); - - switch(theChildTag) { - - case eHTMLTag_script: - mFlags &= ~NS_DTD_FLAG_HAS_OPEN_SCRIPT; - case eHTMLTag_style: - case eHTMLTag_link: - case eHTMLTag_meta: - case eHTMLTag_textarea: - case eHTMLTag_title: - break; - - case eHTMLTag_head: - StripWSFollowingTag(theChildTag,mTokenizer, mTokenAllocator, mLineNumber); - result = CloseContainer(eHTMLTag_head, theChildTag, PR_FALSE); - break; - - case eHTMLTag_form: - result = CloseContainer(eHTMLTag_form, theChildTag, PR_FALSE); - break; - - case eHTMLTag_br: - { - //This is special NAV-QUIRKS code that allows users - //to use </BR>, even though that isn't a legitimate tag. - if(eDTDMode_quirks==mDTDMode) { - // Use recycler and pass the token thro' HandleToken() to fix bugs like 32782. - CHTMLToken* theToken = NS_STATIC_CAST(CHTMLToken*,mTokenAllocator->CreateTokenOfType(eToken_start,theChildTag)); - result=HandleToken(theToken,mParser); - } - } - break; - - case eHTMLTag_body: - case eHTMLTag_html: - StripWSFollowingTag(theChildTag,mTokenizer,mTokenAllocator,mLineNumber); - break; - - default: - { - //now check to see if this token should be omitted, or - //if it's gated from closing by the presence of another tag. - if(gHTMLElements[theChildTag].CanOmitEndTag()) { - PopStyle(theChildTag); - } - else { - eHTMLTags theParentTag=mBodyContext->Last(); - - if(kNotFound==GetIndexOfChildOrSynonym(*mBodyContext,theChildTag)) { - - // Ref: bug 30487 - // Make sure that we don't cross boundaries, of certain elements, - // to close stylistic information. - // Ex. <font face="helvetica"><table><tr><td></font></td></tr></table> some text... - // In the above ex. the orphaned FONT tag, inside TD, should cross TD boundaryto - // close the FONT tag above TABLE. - static eHTMLTags gBarriers[]={eHTMLTag_thead,eHTMLTag_tbody,eHTMLTag_tfoot,eHTMLTag_table}; - - if(!FindTagInSet(theParentTag,gBarriers,sizeof(gBarriers)/sizeof(theParentTag))) { - if(nsHTMLElement::IsResidualStyleTag(theChildTag)) { - mBodyContext->RemoveStyle(theChildTag); // fix bug 77746 - } - } - - // If the bit kHandleStrayTag is set then we automatically open up a matching - // start tag ( compatibility ). Currently this bit is set on P tag. - // This also fixes Bug: 22623 - if(gHTMLElements[theChildTag].HasSpecialProperty(kHandleStrayTag) && - mDTDMode != eDTDMode_full_standards && - mDTDMode != eDTDMode_almost_standards) { - // Oh boy!! we found a "stray" tag. Nav4.x and IE introduce line break in - // such cases. So, let's simulate that effect for compatibility. - // Ex. <html><body>Hello</P>There</body></html> - PRBool theParentContains=-1; //set to -1 to force canomit to recompute. - if(!CanOmit(theParentTag,theChildTag,theParentContains)) { - IF_HOLD(aToken); - mTokenizer->PushTokenFront(aToken); //put this end token back... - CHTMLToken* theToken = NS_STATIC_CAST(CHTMLToken*,mTokenAllocator->CreateTokenOfType(eToken_start,theChildTag)); - mTokenizer->PushTokenFront(theToken); //put this new token onto stack... - } - } - return result; - } - if(result==NS_OK) { - eHTMLTags theTarget=FindAutoCloseTargetForEndTag(theChildTag,*mBodyContext,mDTDMode); - if(eHTMLTag_unknown!=theTarget) { - if (nsHTMLElement::IsResidualStyleTag(theChildTag)) { - result=OpenTransientStyles(theChildTag); - if(NS_FAILED(result)) { - return result; - } - } - result=CloseContainersTo(theTarget,PR_FALSE); - } - } - } - } - break; - } - - return result; -} - -/** - * This method will be triggered when the end of a table is - * encountered. Its primary purpose is to process all the - * bad-contents pertaining a particular table. The position - * of the table is the token bank ID. - * - * @update harishd 03/24/99 - * @param aTag - This ought to be a table tag - * - */ -nsresult CNavDTD::HandleSavedTokens(PRInt32 anIndex) { - NS_PRECONDITION(mBodyContext != nsnull && mBodyContext->GetCount() > 0,"invalid context"); - - nsresult result = NS_OK; - - if(anIndex>kNotFound) { - PRInt32 theBadTokenCount = mMisplacedContent.GetSize(); - - if(theBadTokenCount > 0) { - - if(mTempContext==nsnull) mTempContext=new nsDTDContext(); - - CToken* theToken; - eHTMLTags theTag; - PRInt32 attrCount; - PRInt32 theTopIndex = anIndex + 1; - PRInt32 theTagCount = mBodyContext->GetCount(); - - if (mSink && mSink->IsFormOnStack()) { - // Do this to synchronize dtd stack and the sink stack. - // Note: FORM is never on the dtd stack because its always - // considered as a leaf. However, in the sink FORM can either - // be a container or a leaf. Therefore, we have to check - // with the sink -- Ref: Bug 20087. - ++anIndex; - } - - STOP_TIMER() - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::HandleSavedTokensAbove(), this=%p\n", this)); - // Pause the main context and switch to the new context. - mSink->BeginContext(anIndex); - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::HandleSavedTokensAbove(), this=%p\n", this)); - START_TIMER() - - // The body context should contain contents only upto the marked position. - mBodyContext->MoveEntries(*mTempContext, theTagCount - theTopIndex); - - // Now flush out all the bad contents. - while(theBadTokenCount-- > 0){ - theToken=(CToken*)mMisplacedContent.PopFront(); - if(theToken) { - theTag = (eHTMLTags)theToken->GetTypeID(); - attrCount = (gHTMLElements[theTag].mSkipTarget)? 0:theToken->GetAttributeCount(); - // Put back attributes, which once got popped out, into the tokenizer - for(PRInt32 j=0;j<attrCount; ++j){ - CToken* theAttrToken = (CToken*)mMisplacedContent.PopFront(); - if(theAttrToken) { - mTokenizer->PushTokenFront(theAttrToken); - } - theBadTokenCount--; - } - - if(eToken_end==theToken->GetTokenType()) { - // Ref: Bug 25202 - // Make sure that the BeginContext() is ended only by the call to - // EndContext(). Ex: <center><table><a></center>. - // In the Ex. above </center> should not close <center> above table. - // Doing so will cause the current context to get closed prematurely. - PRInt32 theIndex=mBodyContext->LastOf(theTag); - - if(theIndex!=kNotFound && theIndex<=mBodyContext->mContextTopIndex) { - IF_FREE(theToken, mTokenAllocator); - continue; - } - } - result=HandleToken(theToken,mParser); - } - }//while - if(theTopIndex != mBodyContext->GetCount()) { - CloseContainersTo(theTopIndex,mBodyContext->TagAt(theTopIndex),PR_TRUE); - } - - // Bad-contents were successfully processed. Now, itz time to get - // back to the original body context state. - mTempContext->MoveEntries(*mBodyContext, theTagCount - theTopIndex); - - STOP_TIMER() - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::HandleSavedTokensAbove(), this=%p\n", this)); - // Terminate the new context and switch back to the main context - mSink->EndContext(anIndex); - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::HandleSavedTokensAbove(), this=%p\n", this)); - START_TIMER() - } - } - return result; -} - - -/** - * This method gets called when an entity token has been - * encountered in the parse process. - * - * @update gess 3/25/98 - * @param aToken -- next (start) token to be handled - * @return PR_TRUE if all went well; PR_FALSE if error occured - */ -nsresult CNavDTD::HandleEntityToken(CToken* aToken) { - NS_PRECONDITION(0!=aToken,kNullToken); - - nsresult result=NS_OK; - - const nsAString& theStr = aToken->GetStringValue(); - - if((kHashsign!=theStr.First()) && (-1==nsHTMLEntities::EntityToUnicode(theStr))){ - CToken *theToken=0; -#ifdef DEBUG - //before we just toss this away as a bogus entity, let's check... - CNamedEntity *theEntity=mBodyContext->GetEntity(theStr); - if(theEntity) { - theToken = NS_STATIC_CAST(CTextToken*,mTokenAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text,theEntity->mValue)); - } - else { -#endif - //if you're here we have a bogus entity. - //convert it into a text token. - nsAutoString entityName; - entityName.Assign(NS_LITERAL_STRING("&")); - entityName.Append(theStr); //should append the entity name; fix bug 51161. - theToken = mTokenAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text,entityName); -#ifdef DEBUG - } -#endif - return HandleToken(theToken,mParser); //theToken should get recycled automagically... - } - - eHTMLTags theParentTag=mBodyContext->Last(); - - nsCParserNode* theNode=mNodeAllocator.CreateNode(aToken, mTokenAllocator); - if(theNode) { - PRBool theParentContains=-1; //set to -1 to force CanOmit to recompute... - if(CanOmit(theParentTag,eHTMLTag_entity,theParentContains)) { - eHTMLTags theCurrTag=(eHTMLTags)aToken->GetTypeID(); - result=HandleOmittedTag(aToken,theCurrTag,theParentTag,theNode); - } - else { - result=AddLeaf(theNode); - } - IF_FREE(theNode, &mNodeAllocator); - } - return result; -} - -/** - * This method gets called when a comment token has been - * encountered in the parse process. After making sure - * we're somewhere in the body, we handle the comment - * in the same code that we use for text. - * - * @update gess 3/25/98 - * @param aToken -- next (start) token to be handled - * @return PR_TRUE if all went well; PR_FALSE if error occured - */ -nsresult CNavDTD::HandleCommentToken(CToken* aToken) { - NS_PRECONDITION(0!=aToken,kNullToken); - - nsresult result=NS_OK; - - nsCParserNode* theNode=mNodeAllocator.CreateNode(aToken, mTokenAllocator); - - if(theNode) { - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::HandleCommentToken(), this=%p\n", this)); - - result=(mSink) ? mSink->AddComment(*theNode) : NS_OK; - - IF_FREE(theNode, &mNodeAllocator); - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::HandleCommentToken(), this=%p\n", this)); - START_TIMER(); - } - - return result; -} - - -/** - * This method gets called when an attribute token has been - * encountered in the parse process. This is an error, since - * all attributes should have been accounted for in the prior - * start or end tokens - * - * @update gess 3/25/98 - * @param aToken -- next (start) token to be handled - * @return PR_TRUE if all went well; PR_FALSE if error occured - */ -nsresult CNavDTD::HandleAttributeToken(CToken* aToken) { - NS_PRECONDITION(0!=aToken,kNullToken); - NS_ERROR("attribute encountered -- this shouldn't happen unless the attribute was not part of a start tag!"); - - return NS_OK; -} - -/** - * This method gets called when a script token has been - * encountered in the parse process. n - * - * @update gess 3/25/98 - * @param aToken -- next (start) token to be handled - * @return PR_TRUE if all went well; PR_FALSE if error occured - */ -nsresult CNavDTD::HandleScriptToken(const nsIParserNode *aNode) { - // PRInt32 attrCount=aNode.GetAttributeCount(PR_TRUE); - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::HandleScriptToken(), this=%p\n", this)); - - nsresult result=AddLeaf(aNode); - - mParser->SetCanInterrupt(PR_FALSE); - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::HandleScriptToken(), this=%p\n", this)); - START_TIMER(); - - return result; -} - - -/** - * This method gets called when an "instruction" token has been - * encountered in the parse process. - * - * @update gess 3/25/98 - * @param aToken -- next (start) token to be handled - * @return PR_TRUE if all went well; PR_FALSE if error occured - */ -nsresult CNavDTD::HandleProcessingInstructionToken(CToken* aToken){ - NS_PRECONDITION(0!=aToken,kNullToken); - - nsresult result=NS_OK; - - nsCParserNode* theNode=mNodeAllocator.CreateNode(aToken, mTokenAllocator); - if(theNode) { - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::HandleProcessingInstructionToken(), this=%p\n", this)); - - result=(mSink) ? mSink->AddProcessingInstruction(*theNode) : NS_OK; - - IF_FREE(theNode, &mNodeAllocator); - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::HandleProcessingInstructionToken(), this=%p\n", this)); - START_TIMER(); - - } - return result; -} - -/** - * This method gets called when a DOCTYPE token has been - * encountered in the parse process. - * - * @update harishd 09/02/99 - * @param aToken -- The very first token to be handled - * @return PR_TRUE if all went well; PR_FALSE if error occured - */ -nsresult CNavDTD::HandleDocTypeDeclToken(CToken* aToken){ - NS_PRECONDITION(0!=aToken,kNullToken); - - nsresult result=NS_OK; - - CDoctypeDeclToken* theToken = NS_STATIC_CAST(CDoctypeDeclToken*,aToken); - nsAutoString docTypeStr(theToken->GetStringValue()); - mLineNumber += docTypeStr.CountChar(kNewLine); - - PRInt32 len=docTypeStr.Length(); - PRInt32 pos=docTypeStr.RFindChar(kGreaterThan); - if(pos>-1) { - docTypeStr.Cut(pos,len-pos);// First remove '>' from the end. - } - docTypeStr.Cut(0,2); // Now remove "<!" from the begining - theToken->SetStringValue(docTypeStr); - - nsCParserNode* theNode=mNodeAllocator.CreateNode(aToken, mTokenAllocator); - if(theNode) { - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::HandleDocTypeDeclToken(), this=%p\n", this)); - - result = (mSink)? mSink->AddDocTypeDecl(*theNode):NS_OK; - - IF_FREE(theNode, &mNodeAllocator); - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::HandleDocTypeDeclToken(), this=%p\n", this)); - START_TIMER(); - - } - return result; -} - -/** - * Retrieve the attributes for this node, and add then into - * the node. - * - * @update gess4/22/98 - * @param aNode is the node you want to collect attributes for - * @param aCount is the # of attributes you're expecting - * @return error code (should be 0) - */ -nsresult CNavDTD::CollectAttributes(nsIParserNode& aNode,eHTMLTags aTag,PRInt32 aCount){ - int attr=0; - - nsresult result=NS_OK; - int theAvailTokenCount=mTokenizer->GetCount() + mSkippedContent.GetSize(); - if(aCount<=theAvailTokenCount) { - CToken* theToken=0; - eHTMLTags theSkipTarget=gHTMLElements[aTag].mSkipTarget; - for(attr=0;attr<aCount;++attr){ - if((eHTMLTag_unknown!=theSkipTarget) && mSkippedContent.GetSize()) - theToken=NS_STATIC_CAST(CToken*,mSkippedContent.PopFront()); - else - theToken=mTokenizer->PopToken(); - if(theToken) { - eHTMLTokenTypes theType=eHTMLTokenTypes(theToken->GetTokenType()); - if(theType!=eToken_attribute) { - // If you're here then it means that the token does not - // belong to this node. Put the token back into the tokenizer - // and let it go thro' the regular path. Bug: 59189. - mTokenizer->PushTokenFront(theToken); - break; - } - // Sanitize the key for it might contain some non-alpha-non-digit characters - // at its end. Ex. <OPTION SELECTED/> - This will be tokenized as "<" "OPTION", - // "SELECTED/", and ">". In this case the "SELECTED/" key will be sanitized to - // a legitimate "SELECTED" key. - ((CAttributeToken*)theToken)->SanitizeKey(); - mLineNumber += theToken->GetNewlineCount(); - - aNode.AddAttribute(theToken); - } - } - } - else { - result=kEOF; - } - return result; -} - - -/** - * Causes the next skipped-content token (if any) to - * be consumed by this node. - * - * @update gess 4Sep2000 - * @param node to consume skipped-content - * @param holds the number of skipped content elements encountered - * @return Error condition. - */ -NS_IMETHODIMP -CNavDTD::CollectSkippedContent(PRInt32 aTag, nsAString& aContent, PRInt32 &aLineNo) { - - NS_ASSERTION(aTag >= eHTMLTag_unknown && aTag <= NS_HTML_TAG_MAX, "tag array out of bounds"); - - aContent.Truncate(); - - NS_ASSERTION(eHTMLTag_unknown != gHTMLElements[aTag].mSkipTarget, "cannot collect content for this tag"); - if (eHTMLTag_unknown == gHTMLElements[aTag].mSkipTarget) { - // This tag doesn't support skipped content. - aLineNo = -1; - return NS_OK; - } - - aLineNo = mLineNumber; - mScratch.Truncate(); - PRInt32 i = 0; - PRInt32 tagCount = mSkippedContent.GetSize(); - for (i = 0; i< tagCount; ++i){ - CHTMLToken* theNextToken = (CHTMLToken*)mSkippedContent.PopFront(); - - if (theNextToken) { - eHTMLTokenTypes theTokenType = (eHTMLTokenTypes)theNextToken->GetTokenType(); - - // Dont worry about attributes here because it's already stored in - // the start token as mTrailing content and will get appended in - // start token's GetSource(); - if (eToken_attribute!=theTokenType) { - if ((eToken_entity==theTokenType) && - ((eHTMLTag_textarea == aTag) || (eHTMLTag_title == aTag))) { - mScratch.Truncate(); - ((CEntityToken*)theNextToken)->TranslateToUnicodeStr(mScratch); - if (!mScratch.IsEmpty()){ - aContent.Append(mScratch); - } - else { - // We thought it was an entity but it is not! - bug 79492 - aContent.Append(PRUnichar('&')); - aContent.Append(theNextToken->GetStringValue()); - } - } - else theNextToken->AppendSourceTo(aContent); - } - } - IF_FREE(theNextToken, mTokenAllocator); - } - - InPlaceConvertLineEndings(aContent); - - // Note: TEXTAREA content is PCDATA and hence the newlines are already accounted for. - mLineNumber += (aTag != eHTMLTag_textarea) ? aContent.CountChar(kNewLine) : 0; - - return NS_OK; -} - - /*********************************************************************************** - The preceeding tables determine the set of elements each tag can contain... - ***********************************************************************************/ - -/** - * This method is called to determine whether or not a tag - * of one type can contain a tag of another type. - * - * @update gess 4/8/98 - * @param aParent -- tag enum of parent container - * @param aChild -- tag enum of child container - * @return PR_TRUE if parent can contain child - */ -PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) const -{ - PRBool result=gHTMLElements[aParent].CanContain((eHTMLTags)aChild,mDTDMode); - -#ifdef ALLOW_TR_AS_CHILD_OF_TABLE - if(!result) { - //XXX This vile hack is here to support bug 30378, which allows - //table to contain tr directly in an html32 document. - if((eHTMLTag_tr==aChild) && (eHTMLTag_table==aParent)) { - result=PR_TRUE; - } - } -#endif - - if(eHTMLTag_nobr==aChild) { - if(IsInlineElement(aParent,aParent)){ - if(HasOpenContainer((eHTMLTags)aChild)) { - return PR_FALSE; - } - } - } - - return result; -} - -/** - * Give rest of world access to our tag enums, so that CanContain(), etc, - * become useful. - */ -NS_IMETHODIMP CNavDTD::StringTagToIntTag(const nsAString &aTag, - PRInt32* aIntTag) const -{ - *aIntTag = nsHTMLTags::LookupTag(aTag); - - return NS_OK; -} - -NS_IMETHODIMP_(const PRUnichar *) -CNavDTD::IntTagToStringTag(PRInt32 aIntTag) const -{ - const PRUnichar *str_ptr = nsHTMLTags::GetStringValue((nsHTMLTag)aIntTag); - - NS_ASSERTION(str_ptr, "Bad tag enum passed to CNavDTD::IntTagToStringTag()" - "!!"); - - return str_ptr; -} - -NS_IMETHODIMP_(nsIAtom *) -CNavDTD::IntTagToAtom(PRInt32 aIntTag) const -{ - nsIAtom *atom = nsHTMLTags::GetAtom((nsHTMLTag)aIntTag); - - NS_ASSERTION(atom, "Bad tag enum passed to CNavDTD::IntTagToAtom()" - "!!"); - - return atom; -} - -/** - * This method is called to determine whether or not - * the given childtag is a block element. - * - * @update gess 6June2000 - * @param aChildID -- tag id of child - * @param aParentID -- tag id of parent (or eHTMLTag_unknown) - * @return PR_TRUE if this tag is a block tag - */ -PRBool CNavDTD::IsBlockElement(PRInt32 aTagID,PRInt32 aParentID) const { - PRBool result=PR_FALSE; - eHTMLTags theTag=(eHTMLTags)aTagID; - - if((theTag>eHTMLTag_unknown) && (theTag<eHTMLTag_userdefined)) { - result=((gHTMLElements[theTag].IsMemberOf(kBlock)) || - (gHTMLElements[theTag].IsMemberOf(kBlockEntity)) || - (gHTMLElements[theTag].IsMemberOf(kHeading)) || - (gHTMLElements[theTag].IsMemberOf(kPreformatted))|| - (gHTMLElements[theTag].IsMemberOf(kList))); - } - - return result; -} - -/** - * This method is called to determine whether or not - * the given childtag is an inline element. - * - * @update gess 6June2000 - * @param aChildID -- tag id of child - * @param aParentID -- tag id of parent (or eHTMLTag_unknown) - * @return PR_TRUE if this tag is an inline tag - */ -PRBool CNavDTD::IsInlineElement(PRInt32 aTagID,PRInt32 aParentID) const { - PRBool result=PR_FALSE; - eHTMLTags theTag=(eHTMLTags)aTagID; - - if((theTag>eHTMLTag_unknown) && (theTag<eHTMLTag_userdefined)) { - result=((gHTMLElements[theTag].IsMemberOf(kInlineEntity))|| - (gHTMLElements[theTag].IsMemberOf(kFontStyle)) || - (gHTMLElements[theTag].IsMemberOf(kPhrase)) || - (gHTMLElements[theTag].IsMemberOf(kSpecial)) || - (gHTMLElements[theTag].IsMemberOf(kFormControl))); - } - - return result; -} - -/** - * This method is called to determine whether or not - * the necessary intermediate tags should be propagated - * between the given parent and given child. - * - * @update gess 4/8/98 - * @param aParent -- tag enum of parent container - * @param aChild -- tag enum of child container - * @return PR_TRUE if propagation should occur - */ -PRBool CNavDTD::CanPropagate(eHTMLTags aParent,eHTMLTags aChild,PRBool aParentContains) { - PRBool result=PR_FALSE; - PRBool theParentContains=(-1==aParentContains) ? CanContain(aParent,aChild) : aParentContains; - - if(aParent==aChild) { - return result; - } - - if(nsHTMLElement::IsContainer(aChild)){ - mScratch.Truncate(); - if(!gHTMLElements[aChild].HasSpecialProperty(kNoPropagate)){ - if(nsHTMLElement::IsBlockParent(aParent) || (gHTMLElements[aParent].GetSpecialChildren())) { - - result=ForwardPropagate(mScratch,aParent,aChild); - - if(PR_FALSE==result){ - - if(eHTMLTag_unknown!=aParent) { - if(aParent!=aChild) //dont even bother if we're already inside a similar element... - result=BackwardPropagate(mScratch,aParent,aChild); - } //if - else result=BackwardPropagate(mScratch,eHTMLTag_html,aChild); - - } //elseif - - }//if - }//if - if(mScratch.Length()-1>gHTMLElements[aParent].mPropagateRange) - result=PR_FALSE; - }//if - else result=theParentContains; - - - return result; -} - - -/** - * This method gets called to determine whether a given - * tag can be omitted from opening. Most cannot. - * - * @update gess 3/25/98 - * @param aParent - * @param aChild - * @param aParentContains - * @return PR_TRUE if given tag can contain other tags - */ -PRBool CNavDTD::CanOmit(eHTMLTags aParent,eHTMLTags aChild,PRBool& aParentContains) { - - eHTMLTags theAncestor=gHTMLElements[aChild].mExcludingAncestor; - if (eHTMLTag_unknown!=theAncestor){ - if (HasOpenContainer(theAncestor)) { - return PR_TRUE; - } - } - - theAncestor=gHTMLElements[aChild].mRequiredAncestor; - if(eHTMLTag_unknown!=theAncestor){ - if(!HasOpenContainer(theAncestor)) { - if(!CanPropagate(aParent,aChild,aParentContains)) { - return PR_TRUE; - } - } - return PR_FALSE; - } - - - if(gHTMLElements[aParent].CanExclude(aChild)){ - return PR_TRUE; - } - - //Now the obvious test: if the parent can contain the child, don't omit. - if(-1==aParentContains) - aParentContains=CanContain(aParent,aChild); - - if(aParentContains || (aChild==aParent)){ - return PR_FALSE; - } - - if(gHTMLElements[aParent].IsBlockEntity()) { - if(nsHTMLElement::IsInlineEntity(aChild)) { //feel free to drop inlines that a block doesn't contain. - return PR_TRUE; - } - } - - if(gHTMLElements[aParent].HasSpecialProperty(kBadContentWatch)) { - - if(-1==aParentContains) { - //we need to compute parent containment here, since it wasn't given... - if(!gHTMLElements[aParent].CanContain(aChild,mDTDMode)){ - return PR_TRUE; - } - } - else if (!aParentContains) { - if(!gHTMLElements[aChild].HasSpecialProperty(kBadContentWatch)) { - return PR_TRUE; - } - return PR_FALSE; // Ref. Bug 25658 - } - } - - if(gHTMLElements[aParent].HasSpecialProperty(kSaveMisplaced)) { - return PR_TRUE; - } - - return PR_FALSE; -} - - -/** - * This method gets called to determine whether a given - * tag is itself a container - * - * @update gess 4/8/98 - * @param aTag -- tag to test as a container - * @return PR_TRUE if given tag can contain other tags - */ -PRBool CNavDTD::IsContainer(PRInt32 aTag) const { - return nsHTMLElement::IsContainer((eHTMLTags)aTag); -} - - -/** - * This method tries to design a context vector (without actually - * changing our parser state) from the parent down to the - * child. - * - * @update gess4/6/98 - * @param aVector is the string where we store our output vector - * in bottom-up order. - * @param aParent -- tag type of parent - * @param aChild -- tag type of child - * @return TRUE if propagation closes; false otherwise - */ -PRBool CNavDTD::ForwardPropagate(nsString& aSequence,eHTMLTags aParent,eHTMLTags aChild) { - PRBool result=PR_FALSE; - - switch(aParent) { - case eHTMLTag_table: - { - if((eHTMLTag_tr==aChild) || (eHTMLTag_td==aChild)) { - return BackwardPropagate(aSequence,aParent,aChild); - } - } - //otherwise, intentionally fall through... - - case eHTMLTag_tr: - { - PRBool theCanContainResult=CanContain(eHTMLTag_td,aChild); - if(PR_TRUE==theCanContainResult) { - aSequence.Append((PRUnichar)eHTMLTag_td); - result=BackwardPropagate(aSequence,aParent,eHTMLTag_td); - } - } - break; - - case eHTMLTag_th: - break; - - default: - break; - }//switch - return result; -} - - -/** - * This method tries to design a context map (without actually - * changing our parser state) from the child up to the parent. - * - * @update gess4/6/98 - * @param aVector is the string where we store our output vector - * in bottom-up order. - * @param aParent -- tag type of parent - * @param aChild -- tag type of child - * @return TRUE if propagation closes; false otherwise - */ -PRBool CNavDTD::BackwardPropagate(nsString& aSequence,eHTMLTags aParent,eHTMLTags aChild) const { - - eHTMLTags theParent=aParent; //just init to get past first condition... - - do { - const TagList* theRootTags=gHTMLElements[aChild].GetRootTags(); - if(theRootTags) { - theParent=theRootTags->mTags[0]; - if(CanContain(theParent,aChild)) { - //we've found a complete sequence, so push the parent... - aChild=theParent; - aSequence.Append((PRUnichar)theParent); - } - } - else break; - } - while((theParent!=eHTMLTag_unknown) && (theParent!=aParent)); - - return PRBool(aParent==theParent); -} - - -/** - * This method allows the caller to determine if a given container - * is currently open - * - * @update gess 11/9/98 - * @param - * @return - */ -PRBool CNavDTD::HasOpenContainer(eHTMLTags aContainer) const { - PRBool result=PR_FALSE; - - switch(aContainer) { - case eHTMLTag_form: - result= !(~mFlags & NS_DTD_FLAG_HAS_OPEN_FORM); break; - case eHTMLTag_map: - result=mOpenMapCount>0; break; - default: - result=mBodyContext->HasOpenContainer(aContainer); - break; - } - return result; -} - -/** - * This method allows the caller to determine if a any member - * in a set of tags is currently open - * - * @update gess 11/9/98 - * @param - * @return - */ -PRBool CNavDTD::HasOpenContainer(const eHTMLTags aTagSet[],PRInt32 aCount) const { - - int theIndex; - int theTopIndex=mBodyContext->GetCount()-1; - - for(theIndex=theTopIndex;theIndex>0;theIndex--){ - if(FindTagInSet((*mBodyContext)[theIndex],aTagSet,aCount)) - return PR_TRUE; - } - return PR_FALSE; -} - -/** - * This method retrieves the HTMLTag type of the topmost - * container on the stack. - * - * @update gess 4/2/98 - * @return tag id of topmost node in contextstack - */ -eHTMLTags CNavDTD::GetTopNode() const { - return mBodyContext->Last(); -} - -/********************************************* - Here comes code that handles the interface - to our content sink. - *********************************************/ - - -/** - * It is with great trepidation that I offer this method (privately of course). - * The gets called whenever a container gets opened. This methods job is to - * take a look at the (transient) style stack, and open any style containers that - * are there. Of course, we shouldn't bother to open styles that are incompatible - * with our parent container. - * - * @update gess6/4/98 - * @param tag of the container just opened - * @return 0 (for now) - */ -nsresult CNavDTD::OpenTransientStyles(eHTMLTags aChildTag){ - nsresult result=NS_OK; - - // No need to open transient styles in head context - Fix for 41427 - if((mFlags & NS_DTD_FLAG_ENABLE_RESIDUAL_STYLE) && - eHTMLTag_newline!=aChildTag && - !(mFlags & NS_DTD_FLAG_HAS_OPEN_HEAD)) { - -#ifdef ENABLE_RESIDUALSTYLE - - if(CanContain(eHTMLTag_font,aChildTag)) { - - PRUint32 theCount=mBodyContext->GetCount(); - PRUint32 theLevel=theCount; - - //this first loop is used to determine how far up the containment - //hierarchy we go looking for residual styles. - while ( 1<theLevel) { - eHTMLTags theParentTag = mBodyContext->TagAt(--theLevel); - if(gHTMLElements[theParentTag].HasSpecialProperty(kNoStyleLeaksIn)) { - break; - } - } - - mFlags &= ~NS_DTD_FLAG_ENABLE_RESIDUAL_STYLE; - for(;theLevel<theCount;++theLevel){ - nsEntryStack* theStack=mBodyContext->GetStylesAt(theLevel); - if(theStack){ - - PRInt32 sindex=0; - - nsTagEntry *theEntry=theStack->mEntries; - for(sindex=0;sindex<theStack->mCount;++sindex){ - nsCParserNode* theNode=(nsCParserNode*)theEntry->mNode; - if(1==theNode->mUseCount) { - eHTMLTags theNodeTag=(eHTMLTags)theNode->GetNodeType(); - if(gHTMLElements[theNodeTag].CanContain(aChildTag,mDTDMode)) { - theEntry->mParent = theStack; //we do this too, because this entry differs from the new one we're pushing... - if(gHTMLElements[mBodyContext->Last()].IsMemberOf(kHeading)) { - // Bug 77352 - // The style system needs to identify residual style tags - // within heading tags so that heading tags' size can take - // precedence over the residual style tags' size info.. - // *Note: Make sure that this attribute is transient since it - // should not get carried over to cases other than heading. - CAttributeToken theAttrToken(NS_LITERAL_STRING("_moz-rs-heading"), EmptyString()); - theNode->AddAttribute(&theAttrToken); - result = OpenContainer(theNode,theNodeTag,PR_FALSE,theStack); - theNode->PopAttributeToken(); - } - else { - result = OpenContainer(theNode,theNodeTag,PR_FALSE,theStack); - } - } - else { - //if the node tag can't contain the child tag, then remove the child tag from the style stack - nsCParserNode* node=theStack->Remove(sindex,theNodeTag); - IF_FREE(node, &mNodeAllocator); - --theEntry; //back up by one - } - } //if - ++theEntry; - } //for - } //if - } //for - mFlags |= NS_DTD_FLAG_ENABLE_RESIDUAL_STYLE; - - } //if - -#endif - }//if - return result; -} - -/** - * It is with great trepidation that I offer this method (privately of course). - * The gets called just prior when a container gets opened. This methods job is to - * take a look at the (transient) style stack, and <i>close</i> any style containers - * that are there. Of course, we shouldn't bother to open styles that are incompatible - * with our parent container. - * SEE THE TOP OF THIS FILE for more information about how the transient style stack works. - * - * @update gess6/4/98 - * @param tag of the container just opened - * @return 0 (for now) - */ -nsresult CNavDTD::CloseTransientStyles(eHTMLTags aChildTag){ - return NS_OK; -} - -/** - * This method gets called when an explicit style close-tag is encountered. - * It results in the style tag id being popped from our internal style stack. - * - * @update gess6/4/98 - * @param - * @return 0 if all went well (which it always does) - */ -nsresult CNavDTD::PopStyle(eHTMLTags aTag){ - nsresult result=0; - - if(mFlags & NS_DTD_FLAG_ENABLE_RESIDUAL_STYLE) { -#ifdef ENABLE_RESIDUALSTYLE - if(nsHTMLElement::IsResidualStyleTag(aTag)) { - nsCParserNode* node=mBodyContext->PopStyle(aTag); - IF_FREE(node, &mNodeAllocator); - } -#endif - } //if - return result; -} - - -/** - * This method does two things: 1st, help construct - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * - * @update gess4/22/98 - * @param aNode -- next node to be added to model - */ -nsresult CNavDTD::OpenHTML(const nsCParserNode *aNode){ - NS_PRECONDITION(mBodyContext->GetCount() >= 0, kInvalidTagStackPos); - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::OpenHTML(), this=%p\n", this)); - - nsresult result = (mSink) ? mSink->OpenHTML(*aNode) : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::OpenHTML(), this=%p\n", this)); - START_TIMER(); - - // Don't push more than one HTML tag into the stack... - if (mBodyContext->GetCount() == 0) - mBodyContext->Push(NS_CONST_CAST(nsCParserNode*, aNode), 0, PR_FALSE); - - return result; -} - -/** - * This method does two things: 1st, help construct - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * - * @update gess4/6/98 - * @param aNode -- next node to be removed from our model - * @return TRUE if ok, FALSE if error - */ -nsresult CNavDTD::CloseHTML(){ - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::CloseHTML(), this=%p\n", this)); - - nsresult result = (mSink) ? mSink->CloseHTML() : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::CloseHTML(), this=%p\n", this)); - START_TIMER(); - - return result; -} - - -/** - * This method does two things: 1st, help construct - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * @update gess4/6/98 - * @param aNode -- next node to be added to model - * @return TRUE if ok, FALSE if error - */ -nsresult CNavDTD::OpenHead(const nsIParserNode *aNode) -{ - nsresult result = NS_OK; - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::OpenHead(), this=%p\n", this)); - - if (!(mFlags & NS_DTD_FLAG_HAS_OPEN_HEAD)) { - mFlags |= NS_DTD_FLAG_HAS_OPEN_HEAD; - result = mSink ? mSink->OpenHead(*aNode) : NS_OK; - } - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::OpenHead(), this=%p\n", this)); - START_TIMER(); - - return result; -} - -/** - * This method does two things: 1st, help construct - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * @update gess4/6/98 - * @param aNode -- next node to be removed from our model - * @return TRUE if ok, FALSE if error - */ -nsresult CNavDTD::CloseHead() -{ - nsresult result = NS_OK; - - if (mFlags & NS_DTD_FLAG_HAS_OPEN_HEAD) { - mFlags &= ~NS_DTD_FLAG_HAS_OPEN_HEAD; - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::CloseHead(), this=%p\n", this)); - - result = mSink ? mSink->CloseHead() : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::CloseHead(), this=%p\n", this)); - START_TIMER(); - } - - return result; -} - -/** - * This method does two things: 1st, help construct - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * @update gess4/6/98 - * @param aNode -- next node to be added to model - * @return TRUE if ok, FALSE if error - */ -nsresult CNavDTD::OpenBody(const nsCParserNode *aNode) -{ - NS_PRECONDITION(mBodyContext->GetCount() >= 0, kInvalidTagStackPos); - - nsresult result = NS_OK; - - if (!(mFlags & NS_DTD_FLAG_HAD_FRAMESET)) { - - mFlags |= NS_DTD_FLAG_HAD_BODY; - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::OpenBody(), this=%p\n", this)); - - result = (mSink) ? mSink->OpenBody(*aNode) : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::OpenBody(), this=%p\n", this)); - START_TIMER(); - - if (!HasOpenContainer(eHTMLTag_body)) { - mBodyContext->Push(NS_CONST_CAST(nsCParserNode*, aNode), 0, PR_FALSE); - mTokenizer->PrependTokens(mMisplacedContent); - } - } - - return result; -} - -/** - * This method does two things: 1st, help close - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * @update gess4/6/98 - * @param aNode -- next node to be removed from our model - * @return TRUE if ok, FALSE if error - */ -nsresult CNavDTD::CloseBody() -{ - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::CloseBody(), this=%p\n", this)); - - nsresult result= (mSink) ? mSink->CloseBody() : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::CloseBody(), this=%p\n", this)); - START_TIMER(); - - return result; -} - -/** - * This method does two things: 1st, help construct - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * @update gess4/6/98 - * @param aNode -- next node to be added to model - * @return TRUE if ok, FALSE if error - */ -nsresult CNavDTD::OpenForm(const nsIParserNode *aNode) -{ - nsresult result = NS_OK; - if (!(mFlags & NS_DTD_FLAG_HAS_OPEN_FORM)) { // discard nested forms - bug 72639 - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::OpenForm(), this=%p\n", this)); - - result = (mSink) ? mSink->OpenForm(*aNode) : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::OpenForm(), this=%p\n", this)); - START_TIMER(); - if (NS_OK == result) { - mFlags |= NS_DTD_FLAG_HAS_OPEN_FORM; - } - } - - return result; -} - -/** - * This method does two things: 1st, help construct - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * @update gess4/6/98 - * @param aNode -- next node to be removed from our model - * @return TRUE if ok, FALSE if error - */ -nsresult CNavDTD::CloseForm() -{ - nsresult result = NS_OK; - if (mFlags & NS_DTD_FLAG_HAS_OPEN_FORM) { - mFlags &= ~NS_DTD_FLAG_HAS_OPEN_FORM; - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::CloseForm(), this=%p\n", this)); - - result = (mSink) ? mSink->CloseForm() : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::CloseForm(), this=%p\n", this)); - START_TIMER(); - } - return result; -} - -/** - * This method does two things: 1st, help construct - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * @update gess4/6/98 - * @param aNode -- next node to be added to model - * @return TRUE if ok, FALSE if error - */ -nsresult CNavDTD::OpenMap(const nsCParserNode *aNode) -{ - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::OpenMap(), this=%p\n", this)); - - nsresult result = (mSink) ? mSink->OpenMap(*aNode) : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::OpenMap(), this=%p\n", this)); - START_TIMER(); - - if (NS_OK == result) { - mBodyContext->Push(NS_CONST_CAST(nsCParserNode*, aNode), 0, PR_FALSE); - ++mOpenMapCount; - } - return result; -} - -/** - * This method does two things: 1st, help construct - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * @update gess4/6/98 - * @param aNode -- next node to be removed from our model - * @return TRUE if ok, FALSE if error - */ -nsresult CNavDTD::CloseMap() -{ - nsresult result = NS_OK; - if (mOpenMapCount) { - mOpenMapCount--; - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::CloseMap(), this=%p\n", this)); - - result = (mSink) ? mSink->CloseMap() : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::CloseMap(), this=%p\n", this)); - START_TIMER(); - } - return result; -} - -/** - * This method does two things: 1st, help construct - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * @update gess4/6/98 - * @param aNode -- next node to be added to model - * @return TRUE if ok, FALSE if error - */ -nsresult CNavDTD::OpenFrameset(const nsCParserNode *aNode) -{ - NS_PRECONDITION(mBodyContext->GetCount() >= 0, kInvalidTagStackPos); - - mFlags |= NS_DTD_FLAG_HAD_FRAMESET; - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::OpenFrameset(), this=%p\n", this)); - - nsresult result =( mSink) ? mSink->OpenFrameset(*aNode) : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::OpenFrameset(), this=%p\n", this)); - START_TIMER(); - mBodyContext->Push(NS_CONST_CAST(nsCParserNode*, aNode), 0, PR_FALSE); - - return result; -} - -/** - * This method does two things: 1st, help construct - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * @update gess4/6/98 - * @param aNode -- next node to be removed from our model - * @return TRUE if ok, FALSE if error - */ -nsresult CNavDTD::CloseFrameset() -{ - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::CloseFrameset(), this=%p\n", this)); - - nsresult result = (mSink) ? mSink->CloseFrameset() : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::CloseFrameset(), this=%p\n", this)); - START_TIMER(); - - return result; -} - -/** - * This method does two things: 1st, help construct - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * @update gess4/6/98 - * @param aNode -- next node to be added to model - * @param aClosedByStartTag -- ONLY TRUE if the container is being closed by opening of another container. - * @return TRUE if ok, FALSE if error - */ -nsresult -CNavDTD::OpenContainer(const nsCParserNode *aNode, - eHTMLTags aTag, - PRBool aClosedByStartTag, - nsEntryStack* aStyleStack) -{ - NS_PRECONDITION(mBodyContext->GetCount() >= 0, kInvalidTagStackPos); - - nsresult result = NS_OK; - PRBool done = PR_TRUE; - PRBool rs_tag = nsHTMLElement::IsResidualStyleTag(aTag); - - if (rs_tag) { - /*********************************************************************** - * Here's an interesting problem: - * - * If there's an <a> on the RS-stack, and you're trying to open - * another <a>, the one on the RS-stack should be discarded. - * - * I'm updating OpenTransientStyles to throw old <a>'s away. - * - ***********************************************************************/ - - OpenTransientStyles(aTag); - } - -#ifdef ENABLE_CRC - #define K_OPENOP 100 - CRCStruct theStruct(aTag,K_OPENOP); - mComputedCRC32=AccumulateCRC(mComputedCRC32,(char*)&theStruct,sizeof(theStruct)); -#endif - - switch (aTag) { - case eHTMLTag_html: - result=OpenHTML(aNode); break; - - case eHTMLTag_head: - result=OpenHead(aNode); - break; - - case eHTMLTag_body: - { - eHTMLTags theParent=mBodyContext->Last(); - if (!gHTMLElements[aTag].IsSpecialParent(theParent)) { - mFlags |= NS_DTD_FLAG_HAS_OPEN_BODY; - result = OpenBody(aNode); - } - else { - done = PR_FALSE; - } - } - break; - - case eHTMLTag_counter: //drop it on the floor. - break; - - case eHTMLTag_style: - case eHTMLTag_title: - break; - - case eHTMLTag_textarea: - result = AddLeaf(aNode); - break; - - case eHTMLTag_map: - result = OpenMap(aNode); - break; - - case eHTMLTag_form: - result = OpenForm(aNode); - break; - - case eHTMLTag_frameset: - result = OpenFrameset(aNode); - break; - - case eHTMLTag_script: - result = HandleScriptToken(aNode); - break; - - case eHTMLTag_noscript: - // we want to make sure that OpenContainer gets called below since we're - // not doing it here - done=PR_FALSE; - // If the script is disabled noscript should not be - // in the content model until the layout can somehow - // turn noscript's display property to block <-- bug 67899 - if(mFlags & NS_DTD_FLAG_SCRIPT_ENABLED) { - mScratch.Truncate(); - mFlags |= NS_DTD_FLAG_ALTERNATE_CONTENT; - } - break; - - case eHTMLTag_iframe: // Bug 84491 - case eHTMLTag_noframes: - done=PR_FALSE; - if(mFlags & NS_DTD_FLAG_FRAMES_ENABLED) { - mScratch.Truncate(); - mFlags |= NS_DTD_FLAG_ALTERNATE_CONTENT; - } - break; - - default: - done=PR_FALSE; - break; - } - - if (!done) { - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::OpenContainer(), this=%p\n", this)); - - result=(mSink) ? mSink->OpenContainer(*aNode) : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::OpenContainer(), this=%p\n", this)); - START_TIMER(); - // For residual style tags rs_tag will be true and hence - // the body context will hold an extra reference to the node. - mBodyContext->Push(NS_CONST_CAST(nsCParserNode*, aNode), aStyleStack, rs_tag); - } - - return result; -} - -/** - * This method does two things: 1st, help construct - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * @update gess4/6/98 - * @param aNode -- next node to be removed from our model - * @param aTag -- id of tag to be closed - * @param aClosedByStartTag -- ONLY TRUE if the container is being closed by opening of another container. - * @return TRUE if ok, FALSE if error - */ -nsresult -CNavDTD::CloseContainer(const eHTMLTags aTag, eHTMLTags aTarget,PRBool aClosedByStartTag) -{ - nsresult result = NS_OK; -#ifdef ENABLE_CRC - #define K_CLOSEOP 200 - CRCStruct theStruct(nodeType,K_CLOSEOP); - mComputedCRC32=AccumulateCRC(mComputedCRC32,(char*)&theStruct,sizeof(theStruct)); -#endif - - switch (aTag) { - - case eHTMLTag_html: - result=CloseHTML(); break; - - case eHTMLTag_style: - case eHTMLTag_textarea: - break; - - case eHTMLTag_head: - result=CloseHead(); - break; - - case eHTMLTag_body: - result=CloseBody(); - break; - - case eHTMLTag_map: - result=CloseMap(); - break; - - case eHTMLTag_form: - result=CloseForm(); - break; - - case eHTMLTag_frameset: - result=CloseFrameset(); - break; - - case eHTMLTag_iframe: - case eHTMLTag_noscript: - case eHTMLTag_noframes: - // switch from alternate content state to regular state - mFlags &= ~NS_DTD_FLAG_ALTERNATE_CONTENT; - // falling thro' intentionally.... - case eHTMLTag_title: - default: - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::CloseContainer(), this=%p\n", this)); - - result=(mSink) ? mSink->CloseContainer(aTag) : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::CloseContainer(), this=%p\n", this)); - START_TIMER(); - break; - } - - return result; -} - -/** - * This method does two things: 1st, help construct - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * @update gess4/6/98 - * @param anIndex - * @param aTag - * @param aClosedByStartTag -- if TRUE, then we're closing something because a start tag caused it - * @return TRUE if ok, FALSE if error - */ -nsresult CNavDTD::CloseContainersTo(PRInt32 anIndex,eHTMLTags aTarget, PRBool aClosedByStartTag) -{ - NS_PRECONDITION(mBodyContext->GetCount() > 0, kInvalidTagStackPos); - nsresult result = NS_OK; - - if ((anIndex<mBodyContext->GetCount()) && (anIndex>=0)) { - - PRInt32 count = 0; - while ((count = mBodyContext->GetCount()) > anIndex) { - nsEntryStack* theChildStyleStack = 0; - eHTMLTags theTag = mBodyContext->Last(); - nsCParserNode* theNode = mBodyContext->Pop(theChildStyleStack); - result = CloseContainer(theTag, aTarget,aClosedByStartTag); - -#ifdef ENABLE_RESIDUALSTYLE - - PRBool theTagIsStyle=nsHTMLElement::IsResidualStyleTag(theTag); - // If the current tag cannot leak out then we shouldn't leak out of the target - Fix 40713 - PRBool theStyleDoesntLeakOut = gHTMLElements[theTag].HasSpecialProperty(kNoStyleLeaksOut); - if(!theStyleDoesntLeakOut) { - theStyleDoesntLeakOut = gHTMLElements[aTarget].HasSpecialProperty(kNoStyleLeaksOut); - } - - /************************************************************* - Do not invoke residual style handling when dealing with - alternate content. This fixes bug 25214. - *************************************************************/ - - if(theTagIsStyle && !(mFlags & NS_DTD_FLAG_ALTERNATE_CONTENT)) { - NS_ASSERTION(theNode, "residual style node should not be null"); - if (!theNode) { - if (theChildStyleStack) - mBodyContext->PushStyles(theChildStyleStack); - return NS_OK; - } - PRBool theTargetTagIsStyle = nsHTMLElement::IsResidualStyleTag(aTarget); - if(aClosedByStartTag) { - - /*********************************************************** - Handle closure due to new start tag. - - The cases we're handing here: - 1. <body><b><DIV> //<b> gets pushed onto <body>.mStyles. - 2. <body><a>text<a> //in this case, the target matches, so don't push style - ***************************************************************************/ - - if (theNode->mUseCount == 0){ - if (theTag != aTarget) { - //don't push if thechild==theTarget - if (theChildStyleStack) - theChildStyleStack->PushFront(theNode); - else - mBodyContext->PushStyle(theNode); - } - } - else if (theTag == aTarget && !gHTMLElements[aTarget].CanContainSelf()) { - //here's a case we missed: <a><div>text<a>text</a></div> - //The <div> pushes the 1st <a> onto the rs-stack, then the 2nd <a> - //pops the 1st <a> from the rs-stack altogether. - nsCParserNode* node = mBodyContext->PopStyle(theTag); - IF_FREE(node, &mNodeAllocator); - } - - if (theChildStyleStack) { - mBodyContext->PushStyles(theChildStyleStack); - } - } - else { //Handle closure due to another close tag. - - /*********************************************************** - if you're here, then we're dealing with the closure of tags - caused by a close tag (as opposed to an open tag). - At a minimum, we should consider pushing residual styles up - up the stack or popping and recycling displaced nodes. - - Known cases: - 1. <body><b><div>text</DIV> - Here the <b> will leak into <div> (see case given above), and - when <div> closes the <b> is dropped since it's already residual. - - 2. <body><div><b>text</div> - Here the <b> will leak out of the <div> and get pushed onto - the RS stack for the <body>, since it originated in the <div>. - - 3. <body><span><b>text</span> - In this case, the the <b> get's pushed onto the style stack. - Later we deal with RS styles stored on the <span> - - 4. <body><span><b>text</i> - Here we the <b>is closed by a (synonymous) style tag. - In this case, the <b> is simply closed. - ***************************************************************************/ - - if (theChildStyleStack) { - if (!theStyleDoesntLeakOut) { - if (theTag != aTarget) { - if (theNode->mUseCount == 0) { - theChildStyleStack->PushFront(theNode); - } - } - else if (theNode->mUseCount == 1) { - // This fixes bug 30885,29626. - // Make sure that the node, which is about to - // get released does not stay on the style stack... - // Also be sure to remove the correct style off the - // style stack. - Ref. bug 94208. - // Ex <FONT><B><I></FONT><FONT></B></I></FONT> - // Make sure that </B> removes B off the style stack. - mBodyContext->RemoveStyle(theTag); - } - mBodyContext->PushStyles(theChildStyleStack); - } - else{ - IF_DELETE(theChildStyleStack,&mNodeAllocator); - } - } - else if (theNode->mUseCount == 0) { - - //The old version of this only pushed if the targettag wasn't style. - //But that misses this case: <font><b>text</font>, where the b should leak - if (aTarget != theTag) { - mBodyContext->PushStyle(theNode); - } - } - else { - //Ah, at last, the final case. If you're here, then we just popped a - //style tag that got onto that tag stack from a stylestack somewhere. - //Pop it from the stylestack if the target is also a style tag. - //Make sure to remove the matching style. In the following example - //<FONT><B><I></FONT><FONT color=red></B></I></FONT> make sure that - //</I> does not remove <FONT color=red> off the style stack. - bug 94208 - if (theTargetTagIsStyle && theTag == aTarget) { - mBodyContext->RemoveStyle(theTag); - } - } - } - } //if - else { - //the tag is not a style tag... - if (theChildStyleStack) { - if (theStyleDoesntLeakOut) - IF_DELETE(theChildStyleStack,&mNodeAllocator); - else - mBodyContext->PushStyles(theChildStyleStack); - } - } -#endif - IF_FREE(theNode, &mNodeAllocator); - } - - } //if - return result; -} - -/** - * This method does two things: 1st, help construct - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * @update gess4/6/98 - * @param aTag -- - * @param aClosedByStartTag -- ONLY TRUE if the container is being closed by opening of another container. - * @return TRUE if ok, FALSE if error - */ -nsresult CNavDTD::CloseContainersTo(eHTMLTags aTag,PRBool aClosedByStartTag){ - NS_PRECONDITION(mBodyContext->GetCount() > 0, kInvalidTagStackPos); - - PRInt32 pos=mBodyContext->LastOf(aTag); - - if(kNotFound!=pos) { - //the tag is indeed open, so close it. - return CloseContainersTo(pos,aTag,aClosedByStartTag); - } - - eHTMLTags theTopTag=mBodyContext->Last(); - - PRBool theTagIsSynonymous=((nsHTMLElement::IsResidualStyleTag(aTag)) && (nsHTMLElement::IsResidualStyleTag(theTopTag))); - if(!theTagIsSynonymous){ - theTagIsSynonymous=(gHTMLElements[aTag].IsMemberOf(kHeading) && - gHTMLElements[theTopTag].IsMemberOf(kHeading)); - } - - if(theTagIsSynonymous) { - //if you're here, it's because we're trying to close one tag, - //but a different (synonymous) one is actually open. Because this is NAV4x - //compatibililty mode, we must close the one that's really open. - aTag=theTopTag; - pos=mBodyContext->LastOf(aTag); - if(kNotFound!=pos) { - //the tag is indeed open, so close it. - return CloseContainersTo(pos,aTag,aClosedByStartTag); - } - } - - nsresult result=NS_OK; - const TagList* theRootTags=gHTMLElements[aTag].GetRootTags(); - eHTMLTags theParentTag=(theRootTags) ? theRootTags->mTags[0] : eHTMLTag_unknown; - pos=mBodyContext->LastOf(theParentTag); - if(kNotFound!=pos) { - //the parent container is open, so close it instead - result=CloseContainersTo(pos+1,aTag,aClosedByStartTag); - } - return result; -} - -/** - * This method does two things: 1st, help construct - * our own internal model of the content-stack; and - * 2nd, pass this message on to the sink. - * @update gess4/6/98 - * @param aNode -- next node to be added to model - * @return error code; 0 means OK - */ -nsresult CNavDTD::AddLeaf(const nsIParserNode *aNode){ - nsresult result=NS_OK; - - if(mSink){ - eHTMLTags theTag=(eHTMLTags)aNode->GetNodeType(); - OpenTransientStyles(theTag); - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::AddLeaf(), this=%p\n", this)); - - result=mSink->AddLeaf(*aNode); - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::AddLeaf(), this=%p\n", this)); - START_TIMER(); - - } - return result; -} - -/** - * Call this method ONLY when you want to write a leaf - * into the head container. - * - * @update gess 03/14/99 - * @param aNode -- next node to be added to model - * @return error code; 0 means OK - */ -nsresult CNavDTD::AddHeadLeaf(nsIParserNode *aNode){ - nsresult result=NS_OK; - - static eHTMLTags gNoXTags[] = {eHTMLTag_noembed,eHTMLTag_noframes}; - - eHTMLTags theTag = (eHTMLTags)aNode->GetNodeType(); - - // XXX - SCRIPT inside NOTAGS should not get executed unless the pref. - // says so. Since we don't have this support yet..lets ignore the - // SCRIPT inside NOTAGS. Ref Bug 25880. - if (eHTMLTag_meta == theTag || eHTMLTag_script == theTag) { - if (HasOpenContainer(gNoXTags,sizeof(gNoXTags)/sizeof(eHTMLTag_unknown))) { - return result; - } - } - - if (mSink) { - if (eHTMLTag_title == theTag) { - nsAutoString title; - PRInt32 lineNo; - result = CollectSkippedContent(theTag, title, lineNo); - NS_ENSURE_SUCCESS(result, result); - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::AddHeadLeaf(), this=%p\n", this)); - - result = mSink->SetTitle(title); - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::AddHeadLeaf(), this=%p\n", this)); - START_TIMER(); - } - else { - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::AddHeadLeaf(), this=%p\n", this)); - - result = mSink->AddHeadContent(*aNode); - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::AddHeadLeaf(), this=%p\n", this)); - START_TIMER(); - } - } - return result; -} - -/** - * This method gets called to create a valid context stack - * for the given child. We compare the current stack to the - * default needs of the child, and push new guys onto the - * stack until the child can be properly placed. - * - * @update gess 4/8/98 - * @param aChild is the child for whom we need to - * create a new context vector - * @return true if we succeeded, otherwise false - */ -nsresult CNavDTD::CreateContextStackFor(eHTMLTags aChild){ - - mScratch.Truncate(); - - nsresult result=(nsresult)kContextMismatch; - eHTMLTags theTop=mBodyContext->Last(); - PRBool bResult=ForwardPropagate(mScratch,theTop,aChild); - - if(PR_FALSE==bResult){ - - if(eHTMLTag_unknown!=theTop) { - if(theTop!=aChild) //dont even bother if we're already inside a similar element... - bResult=BackwardPropagate(mScratch,theTop,aChild); - } //if - else bResult=BackwardPropagate(mScratch,eHTMLTag_html,aChild); - } //elseif - - PRInt32 theLen=mScratch.Length(); - eHTMLTags theTag=(eHTMLTags)mScratch[--theLen]; - - if((0==mBodyContext->GetCount()) || (mBodyContext->Last()==theTag)) - result=NS_OK; - - //now, build up the stack according to the tags - //you have that aren't in the stack... - if(PR_TRUE==bResult){ - while(theLen) { - theTag=(eHTMLTags)mScratch[--theLen]; - -#ifdef ALLOW_TR_AS_CHILD_OF_TABLE - if((eHTML3_Quirks==mDocType) && (eHTMLTag_tbody==theTag)) { - //the prev. condition prevents us from emitting tbody in html3.2 docs; fix bug 30378 - continue; - } -#endif - CStartToken *theToken=(CStartToken*)mTokenAllocator->CreateTokenOfType(eToken_start,theTag); - HandleToken(theToken,mParser); //these should all wind up on contextstack, so don't recycle. - } - result=NS_OK; - } - return result; -} - -/** - * - * @update gess5/18/98 - * @param - * @return - */ -nsresult CNavDTD::WillResumeParse(nsIContentSink* aSink){ - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::WillResumeParse(), this=%p\n", this)); - - nsresult result=(aSink) ? aSink->WillResume() : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::WillResumeParse(), this=%p\n", this)); - START_TIMER(); - - return result; -} - -/** - * This method gets called when the parsing process is interrupted - * due to lack of data (waiting for netlib). - * @update gess5/18/98 - * @return error code - */ -nsresult CNavDTD::WillInterruptParse(nsIContentSink* aSink){ - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::WillInterruptParse(), this=%p\n", this)); - - nsresult result=(aSink) ? aSink->WillInterrupt() : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::WillInterruptParse(), this=%p\n", this)); - START_TIMER(); - - return result; -} - diff --git a/htmlparser/src/CNavDTD.h b/htmlparser/src/CNavDTD.h deleted file mode 100644 index 04f646bc8167..000000000000 --- a/htmlparser/src/CNavDTD.h +++ /dev/null @@ -1,417 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -/** - * MODULE NOTES: - * @update gess 7/15/98 - * - * NavDTD is an implementation of the nsIDTD interface. - * In particular, this class captures the behaviors of the original - * Navigator parser productions. - * - * This DTD, like any other in NGLayout, provides a few basic services: - * - First, the DTD collaborates with the Parser class to convert plain - * text into a sequence of HTMLTokens. - * - Second, the DTD describes containment rules for known elements. - * - Third the DTD controls and coordinates the interaction between the - * parsing system and content sink. (The content sink is the interface - * that serves as a proxy for content model). - * - Fourth the DTD maintains an internal style-stack to handle residual (leaky) - * style tags. - * - * You're most likely working in this class file because - * you want to add or change a behavior inherent in this DTD. The remainder - * of this section will describe what you need to do to affect the kind of - * change you want in this DTD. - * - * RESIDUAL-STYLE HANDLNG: - * There are a number of ways to represent style in an HTML document. - * 1) explicit style tags (<B>, <I> etc) - * 2) implicit styles (like those implicit in <Hn>) - * 3) CSS based styles - * - * Residual style handling results from explicit style tags that are - * not closed. Consider this example: <p>text <b>bold </p> - * When the <p> tag closes, the <b> tag is NOT automatically closed. - * Unclosed style tags are handled by the process we call residual-style - * tag handling. - * - * There are two aspects to residual style tag handling. The first is the - * construction and managing of a stack of residual style tags. The - * second is the automatic emission of residual style tags onto leaf content - * in subsequent portions of the document.This step is necessary to propagate - * the expected style behavior to subsequent portions of the document. - * - * Construction and managing the residual style stack is an inline process that - * occurs during the model building phase of the parse process. During the model- - * building phase of the parse process, a content stack is maintained which tracks - * the open container hierarchy. If a style tag(s) fails to be closed when a normal - * container is closed, that style tag is placed onto the residual style stack. If - * that style tag is subsequently closed (in most contexts), it is popped off the - * residual style stack -- and are of no further concern. - * - * Residual style tag emission occurs when the style stack is not empty, and leaf - * content occurs. In our earlier example, the <b> tag "leaked" out of the <p> - * container. Just before the next leaf is emitted (in this or another container) the - * style tags that are on the stack are emitted in succession. These same residual - * style tags get closed automatically when the leaf's container closes, or if a - * child container is opened. - * - * - */ -#ifndef NS_NAVHTMLDTD__ -#define NS_NAVHTMLDTD__ - -#include "nsIDTD.h" -#include "nsISupports.h" -#include "nsIParser.h" -#include "nsHTMLTags.h" -#include "nsVoidArray.h" -#include "nsDeque.h" -#include "nsParserCIID.h" -#include "nsTime.h" -#include "nsDTDUtils.h" - -#define NS_INAVHTML_DTD_IID \ - {0x5c5cce40, 0xcfd6, 0x11d1, \ - {0xaa, 0xda, 0x00, 0x80, 0x5f, 0x8a, 0x3e, 0x14}} - - -class nsIHTMLContentSink; -class nsIParserNode; -class nsParser; -class nsDTDContext; -class nsEntryStack; -class nsITokenizer; -class nsCParserNode; -class nsTokenAllocator; - -/*************************************************************** - Now the main event: CNavDTD. - - This not so simple class performs all the duties of token - construction and model building. It works in conjunction with - an nsParser. - ***************************************************************/ - -#ifdef _MSC_VER -#pragma warning( disable : 4275 ) -#endif - -class CNavDTD : public nsIDTD -{ - -#ifdef _MSC_VER -#pragma warning( default : 4275 ) -#endif - -public: - NS_DECL_ISUPPORTS - NS_DECL_NSIDTD - - /** - * Common constructor for navdtd. You probably want to call - * NS_NewNavHTMLDTD(). - * - * @update gess 7/9/98 - */ - CNavDTD(); - virtual ~CNavDTD(); - - - /** - * This method is called to determine whether or not a tag - * of one type can contain a tag of another type. - * - * @update gess 3/25/98 - * @param aParent -- int tag of parent container - * @param aChild -- int tag of child container - * @return PR_TRUE if parent can contain child - */ - virtual PRBool CanPropagate(eHTMLTags aParent, - eHTMLTags aChild, - PRBool aParentContains) ; - - /** - * This method gets called to determine whether a given - * child tag can be omitted by the given parent. - * - * @update gess 3/25/98 - * @param aParent -- parent tag being asked about omitting given child - * @param aChild -- child tag being tested for omittability by parent - * @param aParentContains -- can be 0,1,-1 (false,true, unknown) - * @return PR_TRUE if given tag can be omitted - */ - virtual PRBool CanOmit(eHTMLTags aParent, - eHTMLTags aChild, - PRBool& aParentContains); - - /** - * This method tries to design a context map (without actually - * changing our parser state) from the parent down to the - * child. - * - * @update gess4/6/98 - * @param aParent -- tag type of parent - * @param aChild -- tag type of child - * @return True if closure was achieved -- other false - */ - virtual PRBool ForwardPropagate(nsString& aSequence, - eHTMLTags aParent, - eHTMLTags aChild); - - /** - * This method tries to design a context map (without actually - * changing our parser state) from the child up to the parent. - * - * @update gess4/6/98 - * @param aParent -- tag type of parent - * @param aChild -- tag type of child - * @return True if closure was achieved -- other false - */ - virtual PRBool BackwardPropagate(nsString& aSequence, - eHTMLTags aParent, - eHTMLTags aChild) const; - - /** - * Attempt forward and/or backward propagation for the given - * child within the current context vector stack. - * @update gess5/11/98 - * @param aChild -- type of child to be propagated. - * @return TRUE if succeeds, otherwise FALSE - */ - nsresult CreateContextStackFor(eHTMLTags aChild); - - /** - * Ask parser if a given container is open ANYWHERE on stack - * @update gess5/11/98 - * @param id of container you want to test for - * @return TRUE if the given container type is open -- otherwise FALSE - */ - virtual PRBool HasOpenContainer(eHTMLTags aContainer) const; - - /** - * Ask parser if a given container is open ANYWHERE on stack - * @update gess5/11/98 - * @param id of container you want to test for - * @return TRUE if the given container type is open -- otherwise FALSE - */ - virtual PRBool HasOpenContainer(const eHTMLTags aTagSet[],PRInt32 aCount) const; - - /** - * Accessor that retrieves the tag type of the topmost item on context - * vector stack. - * - * @update gess5/11/98 - * @return tag type (may be unknown) - */ - virtual eHTMLTags GetTopNode() const; - - /** - * Finds the topmost occurance of given tag within context vector stack. - * @update gess5/11/98 - * @param tag to be found - * @return index of topmost tag occurance -- may be -1 (kNotFound). - */ - // virtual PRInt32 GetTopmostIndexOf(eHTMLTags aTag) const; - - /** - * Finds the topmost occurance of given tag within context vector stack. - * @update gess5/11/98 - * @param tag to be found - * @return index of topmost tag occurance -- may be -1 (kNotFound). - */ - virtual PRInt32 LastOf(eHTMLTags aTagSet[],PRInt32 aCount) const; - - /** - * The following set of methods are used to partially construct - * the content model (via the sink) according to the type of token. - * @update gess5/11/98 - * @param aToken is the token (of a given type) to be handled - * @return error code representing construction state; usually 0. - */ - nsresult HandleStartToken(CToken* aToken); - nsresult HandleDefaultStartToken(CToken* aToken, eHTMLTags aChildTag, - nsCParserNode *aNode); - nsresult HandleEndToken(CToken* aToken); - nsresult HandleEntityToken(CToken* aToken); - nsresult HandleCommentToken(CToken* aToken); - nsresult HandleAttributeToken(CToken* aToken); - nsresult HandleScriptToken(const nsIParserNode *aNode); - nsresult HandleProcessingInstructionToken(CToken* aToken); - nsresult HandleDocTypeDeclToken(CToken* aToken); - nsresult BuildNeglectedTarget(eHTMLTags aTarget, eHTMLTokenTypes aType, - nsIParser* aParser, nsIContentSink* aSink); - - //************************************************* - //these cover methods mimic the sink, and are used - //by the parser to manage its context-stack. - //************************************************* - - /** - * The next set of method open given HTML elements of - * various types. - * - * @update gess5/11/98 - * @param node to be opened in content sink. - * @return error code representing error condition-- usually 0. - */ - nsresult OpenHTML(const nsCParserNode *aNode); - nsresult OpenHead(const nsIParserNode *aNode); - nsresult OpenBody(const nsCParserNode *aNode); - nsresult OpenForm(const nsIParserNode *aNode); - nsresult OpenMap(const nsCParserNode *aNode); - nsresult OpenFrameset(const nsCParserNode *aNode); - nsresult OpenContainer(const nsCParserNode *aNode, - eHTMLTags aTag, - PRBool aClosedByStartTag, - nsEntryStack* aStyleStack=0); - - /** - * The next set of methods close the given HTML element. - * - * @update gess5/11/98 - * @param HTML (node) to be opened in content sink. - * @return error code - 0 if all went well. - */ - nsresult CloseHTML(); - nsresult CloseHead(); - nsresult CloseBody(); - nsresult CloseForm(); - nsresult CloseMap(); - nsresult CloseFrameset(); - - /** - * The special purpose methods automatically close - * one or more open containers. - * @update gess5/11/98 - * @return error code - 0 if all went well. - */ - nsresult CloseContainer(const eHTMLTags aTag, - eHTMLTags aTarget, - PRBool aClosedByStartTag); - nsresult CloseContainersTo(eHTMLTags aTag, - PRBool aClosedByStartTag); - nsresult CloseContainersTo(PRInt32 anIndex, - eHTMLTags aTag, - PRBool aClosedByStartTag); - - /** - * Causes leaf to be added to sink at current vector pos. - * @update gess5/11/98 - * @param aNode is leaf node to be added. - * @return error code - 0 if all went well. - */ - nsresult AddLeaf(const nsIParserNode *aNode); - nsresult AddHeadLeaf(nsIParserNode *aNode); - - /** - * This set of methods is used to create and manage the set of - * transient styles that occur as a result of poorly formed HTML - * or bugs in the original navigator. - * - * @update gess5/11/98 - * @param aTag -- represents the transient style tag to be handled. - * @return error code -- usually 0 - */ - nsresult OpenTransientStyles(eHTMLTags aChildTag); - nsresult CloseTransientStyles(eHTMLTags aChildTag); - nsresult PopStyle(eHTMLTags aTag); - - nsresult PushIntoMisplacedStack(CToken* aToken) - { - NS_ENSURE_ARG_POINTER(aToken); - aToken->SetNewlineCount(0); // Note: We have already counted the newlines for these tokens - - mMisplacedContent.Push(aToken); - return NS_OK; - } - -protected: - - nsresult CollectAttributes(nsIParserNode& aNode,eHTMLTags aTag,PRInt32 aCount); - nsresult CollectSkippedContent(nsIParserNode& aNode,PRInt32& aCount); - nsresult WillHandleStartTag(CToken* aToken,eHTMLTags aChildTag,nsIParserNode& aNode); - nsresult DidHandleStartTag(nsIParserNode& aNode,eHTMLTags aChildTag); - nsresult HandleOmittedTag(CToken* aToken,eHTMLTags aChildTag,eHTMLTags aParent,nsIParserNode *aNode); - nsresult HandleSavedTokens(PRInt32 anIndex); - nsresult HandleKeyGen(nsIParserNode *aNode); - - nsDeque mMisplacedContent; - nsDeque mSkippedContent; - - nsIHTMLContentSink* mSink; - nsTokenAllocator* mTokenAllocator; - nsDTDContext* mBodyContext; - nsDTDContext* mTempContext; - nsParser* mParser; - nsITokenizer* mTokenizer; // weak - - nsString mFilename; - nsString mScratch; //used for various purposes; non-persistent - nsCString mMimeType; - - nsNodeAllocator mNodeAllocator; - nsDTDMode mDTDMode; - eParserDocType mDocType; - eParserCommands mParserCommand; //tells us to viewcontent/viewsource/viewerrors... - - eHTMLTags mSkipTarget; - PRInt32 mLineNumber; - PRInt32 mOpenMapCount; - - PRUint16 mFlags; - -#ifdef ENABLE_CRC - PRUint32 mComputedCRC32; - PRUint32 mExpectedCRC32; -#endif -}; - -inline nsresult NS_NewNavHTMLDTD(nsIDTD** aInstancePtrResult) -{ - NS_DEFINE_CID(kNavDTDCID, NS_CNAVDTD_CID); - return nsComponentManager::CreateInstance(kNavDTDCID, - nsnull, - NS_GET_IID(nsIDTD), - (void**)aInstancePtrResult); -} - -#endif - - - diff --git a/htmlparser/src/COtherDTD.cpp b/htmlparser/src/COtherDTD.cpp deleted file mode 100644 index 78743dc5d42a..000000000000 --- a/htmlparser/src/COtherDTD.cpp +++ /dev/null @@ -1,1016 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * rickg@netscape.com - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -//#define ENABLE_CRC - -#include "nsDebug.h" -#include "nsIAtom.h" -#include "COtherDTD.h" -#include "nsHTMLTokens.h" -#include "nsCRT.h" -#include "nsParser.h" -#include "nsIParser.h" -#include "nsIHTMLContentSink.h" -#include "nsScanner.h" -#include "prenv.h" //this is here for debug reasons... -#include "prtypes.h" //this is here for debug reasons... -#include "prio.h" -#include "plstr.h" -#include "nsDTDUtils.h" -#include "nsHTMLTokenizer.h" -#include "nsTime.h" -#include "nsParserNode.h" -#include "nsHTMLEntities.h" -#include "nsLinebreakConverter.h" -#include "nsUnicharUtils.h" - -#include "prmem.h" - -static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID); -static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); -static NS_DEFINE_IID(kIDTDIID, NS_IDTD_IID); -static NS_DEFINE_IID(kClassIID, NS_IOTHERHTML_DTD_IID); -static const char kVerificationDir[] = "c:/temp"; - - -#ifdef ENABLE_CRC -static char gShowCRC; -#endif - - - -#ifdef MOZ_PERF_METRICS -# define START_TIMER() \ - if(mParser) MOZ_TIMER_START(mParser->mParseTime); \ - if(mParser) MOZ_TIMER_START(mParser->mDTDTime); - -# define STOP_TIMER() \ - if(mParser) MOZ_TIMER_STOP(mParser->mParseTime); \ - if(mParser) MOZ_TIMER_STOP(mParser->mDTDTime); -#else -# define STOP_TIMER() -# define START_TIMER() -#endif - - -#include "COtherElements.h" - - -/************************************************************************ - And now for the main class -- COtherDTD... - ************************************************************************/ - -/** - * This method gets called as part of our COM-like interfaces. - * Its purpose is to create an interface to parser object - * of some type. - * - * @update gess 4/8/98 - * @param nsIID id of object to discover - * @param aInstancePtr ptr to newly discovered interface - * @return NS_xxx result code - */ -nsresult COtherDTD::QueryInterface(const nsIID& aIID, void** aInstancePtr) -{ - if (NULL == aInstancePtr) { - return NS_ERROR_NULL_POINTER; - } - - if(aIID.Equals(kISupportsIID)) { //do IUnknown... - *aInstancePtr = (nsIDTD*)(this); - } - else if(aIID.Equals(kIDTDIID)) { //do IParser base class... - *aInstancePtr = (nsIDTD*)(this); - } - else if(aIID.Equals(kClassIID)) { //do this class... - *aInstancePtr = (COtherDTD*)(this); - } - else { - *aInstancePtr=0; - return NS_NOINTERFACE; - } - NS_ADDREF_THIS(); - return NS_OK; -} - -NS_IMPL_ADDREF(COtherDTD) -NS_IMPL_RELEASE(COtherDTD) - -/** - * Default constructor - * - * @update gess 4/9/98 - * @param - * @return - */ -COtherDTD::COtherDTD() : nsIDTD() { - mSink = 0; - mParser=0; - mLineNumber=1; - mHasOpenBody=PR_FALSE; - mHasOpenHead=0; - mHasOpenForm=PR_FALSE; - mHasOpenMap=PR_FALSE; - mTokenizer=0; - mTokenAllocator=0; - mComputedCRC32=0; - mExpectedCRC32=0; - mDTDState=NS_OK; - mDocType=eHTML_Strict; - mHadFrameset=PR_FALSE; - mHadBody=PR_FALSE; - mHasOpenScript=PR_FALSE; - mParserCommand=eViewNormal; - mNodeAllocator=new nsNodeAllocator(); - mBodyContext=new nsDTDContext(); - -#if 0 //set this to 1 if you want strictDTD to be based on the environment setting. - char* theEnvString = PR_GetEnv("MOZ_DISABLE_STRICT"); - mEnableStrict=PRBool(0==theEnvString); -#else - mEnableStrict=PR_TRUE; -#endif - - if(!gElementTable) { - gElementTable = new CElementTable(); - } -} - -/** - * - * @update gess1/8/99 - * @param - * @return - */ -const nsIID& COtherDTD::GetMostDerivedIID(void)const { - return kClassIID; -} - -/** - * Default destructor - * - * @update gess 4/9/98 - * @param - * @return - */ -COtherDTD::~COtherDTD(){ - delete mBodyContext; - - if(mNodeAllocator) { - delete mNodeAllocator; - mNodeAllocator=nsnull; - } - - NS_IF_RELEASE(mSink); -} - -/** - * This method is defined in nsIParser. It is used to - * cause the COM-like construction of an nsParser. - * - * @update gess 4/8/98 - * @param nsIParser** ptr to newly instantiated parser - * @return NS_xxx error result - */ -nsresult NS_NewOtherHTMLDTD(nsIDTD** aInstancePtrResult) { - COtherDTD* it = new COtherDTD(); - - if (it == 0) { - return NS_ERROR_OUT_OF_MEMORY; - } - - return it->QueryInterface(kClassIID, (void **) aInstancePtrResult); -} - -/** - * Call this method if you want the DTD to construct a fresh - * instance of itself. - * @update gess7/23/98 - * @param - * @return - */ -NS_IMETHODIMP -COtherDTD::CreateNewInstance(nsIDTD** aInstancePtrResult) -{ - nsresult result=NS_NewOtherHTMLDTD(aInstancePtrResult); - - if(aInstancePtrResult) { - COtherDTD *theOtherDTD=(COtherDTD*)*aInstancePtrResult; - if(theOtherDTD) { - theOtherDTD->mDTDMode=mDTDMode; - theOtherDTD->mParserCommand=mParserCommand; - theOtherDTD->mDocType=mDocType; - theOtherDTD->mEnableStrict=mEnableStrict; - } - } - - return result; -} - -/** - * This method is called to determine if the given DTD can parse - * a document in a given source-type. - * NOTE: Parsing always assumes that the end result will involve - * storing the result in the main content model. - * @update gess6/24/98 - * @param - * @return TRUE if this DTD can satisfy the request; FALSE otherwise. - */ -NS_IMETHODIMP_(eAutoDetectResult) -COtherDTD::CanParse(CParserContext& aParserContext, const nsString& aBuffer, - PRInt32 aVersion) -{ - eAutoDetectResult result=eUnknownDetect; - - if(mEnableStrict) { - if(aParserContext.mParserCommand != eViewSource) { - if(PR_TRUE==aParserContext.mMimeType.Equals(kPlainTextContentType)) { - result=eValidDetect; - } - else if(PR_TRUE==aParserContext.mMimeType.Equals(kHTMLTextContentType)) { - switch(aParserContext.mDTDMode) { - case eDTDMode_full_standards: - case eDTDMode_almost_standards: - result=ePrimaryDetect; - break; - default: - result=eValidDetect; - break; - } - } - else { - //otherwise, look into the buffer to see if you recognize anything... - PRBool theBufHasXML=PR_FALSE; - if(BufferContainsHTML(aBuffer,theBufHasXML)){ - result = eValidDetect ; - if(0==aParserContext.mMimeType.Length()) { - aParserContext.SetMimeType(NS_LITERAL_CSTRING(kHTMLTextContentType)); - if(!theBufHasXML) { - switch(aParserContext.mDTDMode) { - case eDTDMode_full_standards: - case eDTDMode_almost_standards: - result=ePrimaryDetect; - break; - default: - result=eValidDetect; - break; - } - } - else result=eValidDetect; - } - } - } - } - } - return result; -} - - -/** - * The parser uses a code sandwich to wrap the parsing process. Before - * the process begins, WillBuildModel() is called. Afterwards the parser - * calls DidBuildModel(). - * @update rickg 03.20.2000 - * @param aParserContext - * @param aSink - * @return error code (almost always 0) - */ -nsresult COtherDTD::WillBuildModel(const CParserContext& aParserContext, - nsITokenizer* aTokenizer, - nsIContentSink* aSink){ - nsresult result=NS_OK; - - mFilename=aParserContext.mScanner->GetFilename(); - mHasOpenBody=PR_FALSE; - mHadFrameset=PR_FALSE; - mLineNumber=1; - mHasOpenScript=PR_FALSE; - mDTDMode=aParserContext.mDTDMode; - mParserCommand=aParserContext.mParserCommand; - mTokenizer = aTokenizer; - - if((!aParserContext.mPrevContext) && (aSink)) { - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: COtherDTD::WillBuildModel(), this=%p\n", this)); - - mDocType=aParserContext.mDocType; - mBodyContext->mFlags.mTransitional=PR_FALSE; - - if(aSink && (!mSink)) { - result=aSink->QueryInterface(kIHTMLContentSinkIID, (void **)&mSink); - } - - if(result==NS_OK) { - result = aSink->WillBuildModel(); - -#ifdef DEBUG - mBodyContext->ResetCounters(); -#endif - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: COtherDTD::WillBuildModel(), this=%p\n", this)); - START_TIMER(); - - mSkipTarget=eHTMLTag_unknown; - mComputedCRC32=0; - mExpectedCRC32=0; - } - } - - return result; -} - - -/** - * This is called when it's time to read as many tokens from the tokenizer - * as you can. Not all tokens may make sense, so you may not be able to - * read them all (until more come in later). - * - * @update gess5/18/98 - * @param aParser is the parser object that's driving this process - * @return error code (almost always NS_OK) - */ -nsresult COtherDTD::BuildModel(nsIParser* aParser,nsITokenizer* aTokenizer,nsITokenObserver* anObserver,nsIContentSink* aSink) { - nsresult result=NS_OK; - - if(aTokenizer) { - nsITokenizer* oldTokenizer=mTokenizer; - mTokenizer=aTokenizer; - mParser=(nsParser*)aParser; - - if(mTokenizer) { - - mTokenAllocator=mTokenizer->GetTokenAllocator(); - - mBodyContext->SetTokenAllocator(mTokenAllocator); - mBodyContext->SetNodeAllocator(mNodeAllocator); - - if(mSink) { - - if(!mBodyContext->GetCount()) { - //if the content model is empty, then begin by opening <html>... - CStartToken *theToken=(CStartToken*)mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_html,NS_LITERAL_STRING("html")); - HandleStartToken(theToken); //this token should get pushed on the context stack, don't recycle it. - } - - while(NS_SUCCEEDED(result)){ - - if(mDTDState!=NS_ERROR_HTMLPARSER_STOPPARSING) { - CToken* theToken=mTokenizer->PopToken(); - if(theToken) { - result=HandleToken(theToken,aParser); - } - else break; - } - else { - result=mDTDState; - break; - } - }//while - mTokenizer=oldTokenizer; - } - } - } - else result=NS_ERROR_HTMLPARSER_BADTOKENIZER; - return result; -} - -/** - * - * @update gess5/18/98 - * @param - * @return - */ -nsresult COtherDTD::DidBuildModel(nsresult anErrorCode,PRBool aNotifySink,nsIParser* aParser,nsIContentSink* aSink){ - nsresult result=NS_OK; - - if(aSink) { - - if(aParser && (NS_OK==result)){ - if(aNotifySink){ - if((NS_OK==anErrorCode) && (mBodyContext->GetCount()>0)) { - - PRInt32 theIndex=mBodyContext->GetCount()-1; - eHTMLTags theChild = mBodyContext->TagAt(theIndex); - while (theIndex>0) { - eHTMLTags theParent = mBodyContext->TagAt(--theIndex); - CElement *theElement = gElementTable->mElements[theParent]; - nsCParserNode *theNode = mBodyContext->PeekNode(); - theElement->HandleEndToken(theNode,theChild,mBodyContext,mSink); - theChild = theParent; - } - - nsEntryStack* theChildStyles = 0; - nsCParserNode* theNode = (nsCParserNode*)mBodyContext->Pop(theChildStyles); - if (theNode) { - mSink->CloseHTML(); - } - NS_ASSERTION(!theChildStyles, "there should no residual style information in this dtd"); - IF_DELETE(theChildStyles, mNodeAllocator); - } - else { - //If you're here, then an error occured, but we still have nodes on the stack. - //At a minimum, we should grab the nodes and recycle them. - //Just to be correct, we'll also recycle the nodes. - - while (mBodyContext->GetCount() > 0) { - - nsEntryStack *theChildStyles = 0; - nsCParserNode* theNode = (nsCParserNode*)mBodyContext->Pop(theChildStyles); - if (theNode) { - theNode->mUseCount = 0; - if (theChildStyles) { - delete theChildStyles; - } - IF_FREE(theNode, mNodeAllocator); - } - NS_ASSERTION(!theChildStyles, "there should no residual style information in this dtd"); - IF_DELETE(theChildStyles, mNodeAllocator); - } - } - - } - } //if aparser - - //No matter what, you need to call did build model. - result = aSink->DidBuildModel(); - - } //if asink - return result; -} - -NS_IMETHODIMP_(void) -COtherDTD::Terminate() -{ - mDTDState = NS_ERROR_HTMLPARSER_STOPPARSING; -} - -NS_IMETHODIMP_(PRInt32) -COtherDTD::GetType() -{ - return NS_IPARSER_FLAG_HTML; -} - -NS_IMETHODIMP -COtherDTD::CollectSkippedContent(PRInt32 aTag, nsAString& aContent, PRInt32 &aLineNo) -{ - return NS_OK; -} - -/** - * This big dispatch method is used to route token handler calls to the right place. - * What's wrong with it? This table, and the dispatch methods themselves need to be - * moved over to the delegate. Ah, so much to do... - * - * @update gess 12/1/99 - * @param aToken - * @param aParser - * @return - */ -nsresult COtherDTD::HandleToken(CToken* aToken,nsIParser* aParser){ - nsresult result=NS_OK; - - if(aToken) { - CHTMLToken* theToken= (CHTMLToken*)(aToken); - eHTMLTokenTypes theType=eHTMLTokenTypes(theToken->GetTokenType()); - -// theToken->mUseCount=0; //assume every token coming into this system needs recycling. - - mParser=(nsParser*)aParser; - - switch(theType) { - case eToken_text: - case eToken_start: - case eToken_whitespace: - case eToken_newline: - case eToken_doctypeDecl: - case eToken_markupDecl: - result=HandleStartToken(theToken); break; - - case eToken_entity: - result=HandleEntityToken(theToken); break; - - case eToken_end: - result=HandleEndToken(theToken); break; - - default: - break; - }//switch - - - if(NS_SUCCEEDED(result) || (NS_ERROR_HTMLPARSER_BLOCK==result)) { - IF_FREE(theToken, mTokenAllocator); - } - else if(result==NS_ERROR_HTMLPARSER_STOPPARSING) - mDTDState=result; - else return NS_OK; - - }//if - return result; -} - - -/** - * This gets called after we've handled a given start tag. - * It's a generic hook to let us to post processing. - * @param aToken contains the tag in question - * @param aTag is the tag itself. - * @return status - */ -nsresult COtherDTD::DidHandleStartTag(nsIParserNode& aNode,eHTMLTags aChildTag){ - nsresult result=NS_OK; - - switch(aChildTag){ - - case eHTMLTag_script: - mHasOpenScript=PR_TRUE; - break; - - case eHTMLTag_pre: - case eHTMLTag_listing: - { - CToken* theNextToken=mTokenizer->PeekToken(); - if(theNextToken) { - eHTMLTokenTypes theType=eHTMLTokenTypes(theNextToken->GetTokenType()); - if(eToken_newline==theType){ - ++mLineNumber; - mTokenizer->PopToken(); //skip 1st newline inside PRE and LISTING - }//if - }//if - } - break; - -#ifdef DEBUG - case eHTMLTag_meta: - { - //we should only enable user-defined entities in debug builds... - - PRInt32 theCount=aNode.GetAttributeCount(); - const nsAString* theNamePtr=0; - const nsAString* theValuePtr=0; - - if(theCount) { - PRInt32 theIndex=0; - for(theIndex=0;theIndex<theCount;++theIndex){ - const nsAString& theKey = aNode.GetKeyAt(theIndex); - if(theKey.Equals(NS_LITERAL_STRING("ENTITY"), nsCaseInsensitiveStringComparator())) { - const nsAString& theName=aNode.GetValueAt(theIndex); - theNamePtr=&theName; - } - else if(theKey.Equals(NS_LITERAL_STRING("VALUE"), nsCaseInsensitiveStringComparator())) { - //store the named enity with the context... - const nsAString& theValue=aNode.GetValueAt(theIndex); - theValuePtr=&theValue; - } - } - } - if(theNamePtr && theValuePtr) { - mBodyContext->RegisterEntity(*theNamePtr,*theValuePtr); - } - } - break; -#endif - - default: - break; - }//switch - - return result; -} - -/** - * This gets called before we've handled a given start tag. - * It's a generic hook to let us do pre processing. - * @param aToken contains the tag in question - * @param aChildTag is the tag itself. - * @param aNode is the node (tag) with associated attributes. - * @return TRUE if tag processing should continue; FALSE if the tag has been handled. - */ -nsresult COtherDTD::WillHandleStartTag(CToken* aToken,eHTMLTags aTag,nsIParserNode& aNode){ - nsresult result=NS_OK; - - //first let's see if there's some skipped content to deal with... -#if 0 - PRInt32 theAttrCount = aNode.GetAttributeCount(); - if(*gElementTable->mElements[aTag].mSkipTarget) { - result=CollectSkippedContent(aNode,theAttrCount); - } -#endif - - STOP_TIMER() - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: COtherDTD::WillHandleStartTag(), this=%p\n", this)); - - if(mParser) { - - switch(aTag) { - case eHTMLTag_newline: - ++mLineNumber; - break; - default: - break; - } - mSink->NotifyTagObservers(&aNode); - } - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: COtherDTD::WillHandleStartTag(), this=%p\n", this)); - START_TIMER() - - return result; -} - - -/** - * This method gets called when a start token has been - * encountered in the parse process. If the current container - * can contain this tag, then add it. Otherwise, you have - * two choices: 1) create an implicit container for this tag - * to be stored in - * 2) close the top container, and add this to - * whatever container ends up on top. - * - * @update gess 1/04/99 - * @param aToken -- next (start) token to be handled - * @param aNode -- CParserNode representing this start token - * @return PR_TRUE if all went well; PR_FALSE if error occured - */ -nsresult COtherDTD::HandleStartToken(CToken* aToken) { - - //Begin by gathering up attributes... - - nsresult result=NS_OK; - nsCParserNode* theNode=mNodeAllocator->CreateNode(aToken, mTokenAllocator); - if(theNode) { - - eHTMLTags theChildTag=(eHTMLTags)aToken->GetTypeID(); - PRInt16 attrCount=aToken->GetAttributeCount(); - eHTMLTags theParent=mBodyContext->Last(); - - result=(0==attrCount) ? NS_OK : CollectAttributes(*theNode,theChildTag,attrCount); - - if(NS_OK==result) { - result=WillHandleStartTag(aToken,theChildTag,*theNode); - if(NS_OK==result) { - - mLineNumber += aToken->GetNewlineCount(); - - PRBool theTagWasHandled=PR_FALSE; - - switch(theChildTag) { - - case eHTMLTag_html: - if(!mBodyContext->HasOpenContainer(theChildTag)){ - mSink->OpenHTML(*theNode); - mBodyContext->Push(theNode, 0, PR_FALSE); - } - theTagWasHandled=PR_TRUE; - break; - - default: - CElement* theElement=gElementTable->mElements[theParent]; - if(theElement) { - result=theElement->HandleStartToken(theNode,theChildTag,mBodyContext,mSink); - theTagWasHandled=PR_TRUE; - } - break; - }//switch - - if(theTagWasHandled) { - DidHandleStartTag(*theNode,theChildTag); - } - - } //if - }//if - IF_FREE(theNode, mNodeAllocator); - } - else result=NS_ERROR_OUT_OF_MEMORY; - - return result; -} - -/** - * This method gets called when an end token has been - * encountered in the parse process. If the end tag matches - * the start tag on the stack, then simply close it. Otherwise, - * we have a erroneous state condition. This can be because we - * have a close tag with no prior open tag (user error) or because - * we screwed something up in the parse process. I'm not sure - * yet how to tell the difference. - * - * @update gess 3/25/98 - * @param aToken -- next (start) token to be handled - * @return PR_TRUE if all went well; PR_FALSE if error occured - */ -nsresult COtherDTD::HandleEndToken(CToken* aToken) { - nsresult result=NS_OK; - eHTMLTags theChildTag=(eHTMLTags)aToken->GetTypeID(); - - switch(theChildTag) { - - case eHTMLTag_body: //we intentionally don't let the user close HTML or BODY - case eHTMLTag_html: - break; - - case eHTMLTag_script: - mHasOpenScript=PR_FALSE; - - default: - PRInt32 theCount=mBodyContext->GetCount(); - eHTMLTags theParent=mBodyContext->TagAt(theCount-1); - if(theChildTag==theParent) { - theParent=mBodyContext->TagAt(theCount-2); - } - CElement* theElement=gElementTable->mElements[theParent]; - if(theElement) { - nsCParserNode* theNode=mNodeAllocator->CreateNode(aToken, mTokenAllocator); - if(theNode) { - result=theElement->HandleEndToken(theNode,theChildTag,mBodyContext,mSink); - IF_FREE(theNode, mNodeAllocator); - } - } - break; - } - - return result; -} - -/** - * Retrieve the attributes for this node, and add then into - * the node. - * - * @update gess4/22/98 - * @param aNode is the node you want to collect attributes for - * @param aCount is the # of attributes you're expecting - * @return error code (should be 0) - */ -nsresult COtherDTD::CollectAttributes(nsIParserNode& aNode,eHTMLTags aTag,PRInt32 aCount){ - int attr=0; - - nsresult result=NS_OK; - int theAvailTokenCount=mTokenizer->GetCount(); - if(aCount<=theAvailTokenCount) { - //gElementTable->mElements[aTag]->GetSkipTarget(); - CToken* theToken=0; - for(attr=0;attr<aCount;++attr){ - theToken=mTokenizer->PopToken(); - if(theToken) { - // Sanitize the key for it might contain some non-alpha-non-digit characters - // at its end. Ex. <OPTION SELECTED/> - This will be tokenized as "<" "OPTION", - // "SELECTED/", and ">". In this case the "SELECTED/" key will be sanitized to - // a legitimate "SELECTED" key. - ((CAttributeToken*)theToken)->SanitizeKey(); - - aNode.AddAttribute(theToken); - } - } - } - else { - result=kEOF; - } - return result; -} - -/** - * This method gets called when an entity token has been - * encountered in the parse process. - * - * @update gess 3/25/98 - * @param aToken -- next (start) token to be handled - * @return PR_TRUE if all went well; PR_FALSE if error occured - */ -nsresult COtherDTD::HandleEntityToken(CToken* aToken) { - nsresult result=NS_OK; - - nsAutoString theStr; - aToken->GetSource(theStr); - PRUnichar theChar=theStr.CharAt(0); - CToken *theToken=0; - - if((kHashsign!=theChar) && (-1==nsHTMLEntities::EntityToUnicode(theStr))){ - -#ifdef DEBUG - //before we just toss this away as a bogus entity, let's check... - CNamedEntity *theEntity=mBodyContext->GetEntity(theStr); - if(theEntity) { - theToken=(CTextToken*)mTokenAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text,theEntity->mValue); - } - else { -#endif - //if you're here we have a bogus entity. - //convert it into a text token. - nsAutoString entityName; - entityName.Assign(NS_LITERAL_STRING("&")); - entityName.Append(theStr); //should append the entity name; fix bug 51161. - theToken=(CTextToken*)mTokenAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text,entityName); -#ifdef DEBUG - } -#endif - result=HandleStartToken(theToken); - } - else { - - //add this code to fix bug 42629 (entities were getting dropped). - eHTMLTags theParent=mBodyContext->Last(); - CElement* theElement=gElementTable->mElements[theParent]; - if(theElement) { - nsCParserNode theNode(aToken, 0); - result=theElement->HandleStartToken(&theNode,eHTMLTag_text,mBodyContext,mSink); - } - } - return result; -} - - /*********************************************************************************** - The preceeding tables determine the set of elements each tag can contain... - ***********************************************************************************/ - -/** - * This method is called to determine whether or not a tag - * of one type can contain a tag of another type. - * - * @update gess 4/8/98 - * @param aParent -- tag enum of parent container - * @param aChild -- tag enum of child container - * @return PR_TRUE if parent can contain child - */ -PRBool COtherDTD::CanContain(PRInt32 aParent,PRInt32 aChild) const { - CElement *theParent=gElementTable->mElements[eHTMLTags(aParent)]; - if(theParent) { - CElement *theChild=gElementTable->mElements[eHTMLTags(aChild)]; - if(aChild) { - if(eHTMLTag_userdefined == aChild)//bug #67007, dont strip userdefined tags - return PR_TRUE; - else - return theParent->CanContain(theChild,mBodyContext); - } - } - return PR_FALSE; -} - -/** - * Give rest of world access to our tag enums, so that CanContain(), etc, - * become useful. - */ -NS_IMETHODIMP -COtherDTD::StringTagToIntTag(const nsAString &aTag, - PRInt32* aIntTag) const -{ - *aIntTag = nsHTMLTags::LookupTag(aTag); - - return NS_OK; -} - -NS_IMETHODIMP_(const PRUnichar *) -COtherDTD::IntTagToStringTag(PRInt32 aIntTag) const -{ - const PRUnichar *str_ptr = nsHTMLTags::GetStringValue((nsHTMLTag)aIntTag); - - NS_ASSERTION(str_ptr, "Bad tag enum passed to COtherDTD::IntTagToStringTag()" - "!!"); - - return str_ptr; -} - -NS_IMETHODIMP_(nsIAtom *) -COtherDTD::IntTagToAtom(PRInt32 aIntTag) const -{ - nsIAtom *atom = nsHTMLTags::GetAtom((nsHTMLTag)aIntTag); - - NS_ASSERTION(atom, "Bad tag enum passed to COtherDTD::IntTagToAtom()" - "!!"); - - return atom; -} - -/** - * This method is called to determine whether or not - * the given childtag is a block element. - * - * @update gess 6June2000 - * @param aChildID -- tag id of child - * @param aParentID -- tag id of parent (or eHTMLTag_unknown) - * @return PR_TRUE if this tag is a block tag - */ -PRBool COtherDTD::IsBlockElement(PRInt32 aChildID,PRInt32 aParentID) const { - PRBool result=PR_FALSE; - - if(gElementTable) { - CElement *theElement=gElementTable->GetElement((eHTMLTags)aChildID); - result = (theElement) ? theElement->IsBlockElement((eHTMLTags)aParentID) : PR_FALSE; - } - return result; -} - -/** - * This method is called to determine whether or not - * the given childtag is an inline element. - * - * @update gess 6June2000 - * @param aChildID -- tag id of child - * @param aParentID -- tag id of parent (or eHTMLTag_unknown) - * @return PR_TRUE if this tag is an inline element - */ -PRBool COtherDTD::IsInlineElement(PRInt32 aChildID,PRInt32 aParentID) const { - PRBool result=PR_FALSE; - - if(gElementTable) { - CElement *theElement=gElementTable->GetElement((eHTMLTags)aChildID); - result = (theElement) ? theElement->IsInlineElement((eHTMLTags)aParentID) : PR_FALSE; - } - return result; -} - -/** - * This method gets called to determine whether a given - * tag is itself a container - * - * @update gess 4/8/98 - * @param aTag -- tag to test as a container - * @return PR_TRUE if given tag can contain other tags - */ -PRBool COtherDTD::IsContainer(PRInt32 aTag) const { - return gElementTable->mElements[eHTMLTags(aTag)]->IsContainer(); -} - -/** - * - * @update gess5/18/98 - * @param - * @return - */ -nsresult COtherDTD::WillResumeParse(nsIContentSink* aSink) { - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: COtherDTD::WillResumeParse(), this=%p\n", this)); - - nsresult result=(aSink) ? aSink->WillResume() : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: COtherDTD::WillResumeParse(), this=%p\n", this)); - START_TIMER(); - - return result; -} - -/** - * This method gets called when the parsing process is interrupted - * due to lack of data (waiting for netlib). - * @update gess5/18/98 - * @return error code - */ -nsresult COtherDTD::WillInterruptParse(nsIContentSink* aSink){ - - STOP_TIMER(); - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: COtherDTD::WillInterruptParse(), this=%p\n", this)); - - nsresult result=(aSink) ? aSink->WillInterrupt() : NS_OK; - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: COtherDTD::WillInterruptParse(), this=%p\n", this)); - START_TIMER(); - - return result; -} - -// CTransitionalDTD is a subclass of COtherDTD that defaults to transitional mode. -// Used by the editor - -CTransitionalDTD::CTransitionalDTD() -{ - if (mBodyContext) mBodyContext->mFlags.mTransitional = PR_TRUE; -} - -CTransitionalDTD::~CTransitionalDTD() {} - diff --git a/htmlparser/src/COtherDTD.h b/htmlparser/src/COtherDTD.h deleted file mode 100644 index 9e4c51707c87..000000000000 --- a/htmlparser/src/COtherDTD.h +++ /dev/null @@ -1,223 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -/** - * MODULE NOTES: - * @update gess 7/15/98 - * - * NavDTD is an implementation of the nsIDTD interface. - * In particular, this class captures the behaviors of the original - * Navigator parser productions. - * - * This DTD, like any other in NGLayout, provides a few basic services: - * - First, the DTD collaborates with the Parser class to convert plain - * text into a sequence of HTMLTokens. - * - Second, the DTD describes containment rules for known elements. - * - Third the DTD controls and coordinates the interaction between the - * parsing system and content sink. (The content sink is the interface - * that serves as a proxy for content model). - * - Fourth the DTD maintains an internal style-stack to handle residual (leaky) - * style tags. - * - * You're most likely working in this class file because - * you want to add or change a behavior inherent in this DTD. The remainder - * of this section will describe what you need to do to affect the kind of - * change you want in this DTD. - * - * RESIDUAL-STYLE HANDLNG: - * There are a number of ways to represent style in an HTML document. - * 1) explicit style tags (<B>, <I> etc) - * 2) implicit styles (like those implicit in <Hn>) - * 3) CSS based styles - * - * Residual style handling results from explicit style tags that are - * not closed. Consider this example: <p>text <b>bold </p> - * When the <p> tag closes, the <b> tag is NOT automatically closed. - * Unclosed style tags are handled by the process we call residual-style - * tag handling. - * - * There are two aspects to residual style tag handling. The first is the - * construction and managing of a stack of residual style tags. The - * second is the automatic emission of residual style tags onto leaf content - * in subsequent portions of the document.This step is necessary to propagate - * the expected style behavior to subsequent portions of the document. - * - * Construction and managing the residual style stack is an inline process that - * occurs during the model building phase of the parse process. During the model- - * building phase of the parse process, a content stack is maintained which tracks - * the open container hierarchy. If a style tag(s) fails to be closed when a normal - * container is closed, that style tag is placed onto the residual style stack. If - * that style tag is subsequently closed (in most contexts), it is popped off the - * residual style stack -- and are of no further concern. - * - * Residual style tag emission occurs when the style stack is not empty, and leaf - * content occurs. In our earlier example, the <b> tag "leaked" out of the <p> - * container. Just before the next leaf is emitted (in this or another container) the - * style tags that are on the stack are emitted in succession. These same residual - * style tags get closed automatically when the leaf's container closes, or if a - * child container is opened. - * - * - */ -#ifndef NS_OTHERDTD__ -#define NS_OTHERDTD__ - -#include "nsIDTD.h" -#include "nsISupports.h" -#include "nsIParser.h" -#include "nsHTMLTokens.h" -#include "nsVoidArray.h" -#include "nsDeque.h" -#include "nsParserCIID.h" - -#define NS_IOTHERHTML_DTD_IID \ - {0x8a5e89c0, 0xd16d, 0x11d1, \ - {0x80, 0x22, 0x00, 0x60, 0x8, 0x14, 0x98, 0x89}} - -class nsIHTMLContentSink; -class nsIParserNode; -class nsParser; -class nsDTDContext; -class nsEntryStack; -class nsITokenizer; -class nsIParserNode; -class nsTokenAllocator; -class nsNodeAllocator; - -/*************************************************************** - Now the main event: COtherDTD. - - This not so simple class performs all the duties of token - construction and model building. It works in conjunction with - an nsParser. - ***************************************************************/ - -#ifdef _MSC_VER -#pragma warning( disable : 4275 ) -#endif - -class COtherDTD : public nsIDTD -{ - -#ifdef _MSC_VER -#pragma warning( default : 4275 ) -#endif - - public: - NS_DECL_ISUPPORTS - NS_DECL_NSIDTD - - /** - * Common constructor for navdtd. You probably want to call - * NS_NewNavHTMLDTD(). - * - * @update gess 7/9/98 - */ - COtherDTD(); - - /** - * Virtual destructor -- you know what to do - * @update gess 7/9/98 - */ - virtual ~COtherDTD(); - - /** - * The following set of methods are used to partially construct - * the content model (via the sink) according to the type of token. - * @update gess5/11/98 - * @param aToken is the token (of a given type) to be handled - * @return error code representing construction state; usually 0. - */ - nsresult HandleStartToken(CToken* aToken); - nsresult HandleEndToken(CToken* aToken); - nsresult HandleEntityToken(CToken* aToken); - - //************************************************* - //these cover methods mimic the sink, and are used - //by the parser to manage its context-stack. - //************************************************* - -protected: - - nsresult CollectAttributes(nsIParserNode& aNode,eHTMLTags aTag,PRInt32 aCount); - nsresult WillHandleStartTag(CToken* aToken,eHTMLTags aTag,nsIParserNode& aNode); - nsresult DidHandleStartTag(nsIParserNode& aNode,eHTMLTags aChildTag); - nsIParserNode* CreateNode(CToken* aToken=nsnull,PRInt32 aLineNumber=1,nsTokenAllocator* aTokenAllocator=0); - - nsIHTMLContentSink* mSink; - - nsDTDContext* mBodyContext; - PRInt32 mHasOpenHead; - PRPackedBool mHasOpenForm; - PRPackedBool mHasOpenMap; - PRPackedBool mHasOpenBody; - PRPackedBool mHadFrameset; - PRPackedBool mHadBody; - PRPackedBool mHasOpenScript; - PRPackedBool mEnableStrict; - nsString mFilename; - PRInt32 mLineNumber; - nsParser* mParser; - nsITokenizer* mTokenizer; // weak - nsTokenAllocator* mTokenAllocator; - nsNodeAllocator* mNodeAllocator; - eHTMLTags mSkipTarget; - nsresult mDTDState; - nsDTDMode mDTDMode; - eParserCommands mParserCommand; //tells us to viewcontent/viewsource/viewerrors... - - PRUint32 mComputedCRC32; - PRUint32 mExpectedCRC32; - nsString mScratch; //used for various purposes; non-persistent - eParserDocType mDocType; - -}; - -extern nsresult NS_NewOtherHTMLDTD(nsIDTD** aInstancePtrResult); - - -class CTransitionalDTD : public COtherDTD -{ - public: - CTransitionalDTD(); - virtual ~CTransitionalDTD(); -}; - -#endif //NS_OTHERDTD__ - - - diff --git a/htmlparser/src/COtherElements.h b/htmlparser/src/COtherElements.h deleted file mode 100644 index 7d40be9428a2..000000000000 --- a/htmlparser/src/COtherElements.h +++ /dev/null @@ -1,2926 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * rickg@netscape.com - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -/************************************************************************ - * MODULE NOTES: - * @update gess 04.08.2000 - * - * - CElement::mAutoClose should only be set for tags whose end tag - * is optional. - * - * - ************************************************************************/ - -#ifndef _COTHERELEMENTS_ -#define _COTHERELEMENTS_ - -#include "nsDTDUtils.h" - -/************************************************************************ - This union is a bitfield which describes the group membership - ************************************************************************/ - - -struct CGroupBits { - PRUint32 mHead: 1; - PRUint32 mHeadMisc: 1; //script, style, meta, link, object - PRUint32 mHeadContent: 1; //title, base - PRUint32 mFontStyle : 1; - PRUint32 mPhrase: 1; - PRUint32 mSpecial: 1; - PRUint32 mFormControl: 1; - PRUint32 mHeading: 1; - PRUint32 mBlock: 1; - PRUint32 mFrame:1; - PRUint32 mList: 1; - PRUint32 mPreformatted: 1; - PRUint32 mTable: 1; - PRUint32 mSelf: 1; - PRUint32 mLeaf: 1; - PRUint32 mWhiteSpace: 1; - PRUint32 mComment: 1; - PRUint32 mTextContainer: 1; - PRUint32 mTopLevel: 1; - PRUint32 mDTDInternal: 1; - PRUint32 mFlowEntity: 1; - PRUint32 mBlockEntity: 1; - PRUint32 mInlineEntity: 1; -}; - -union CGroupMembers { - PRUint32 mAllBits; - CGroupBits mBits; -}; - - -inline PRBool ContainsGroup(CGroupMembers& aGroupSet,CGroupMembers& aGroup) { - PRBool result=PR_FALSE; - if(aGroup.mAllBits) { - result=(aGroupSet.mAllBits & aGroup.mAllBits) ? PR_TRUE : PR_FALSE; - } - return result; -} - -inline PRBool ListContainsTag(const eHTMLTags* aTagList,eHTMLTags aTag) { - if(aTagList) { - const eHTMLTags *theNextTag=aTagList; - while(eHTMLTag_unknown!=*theNextTag) { - if(aTag==*theNextTag) { - return PR_TRUE; - } - ++theNextTag; - } - } - return PR_FALSE; -} - - -/********************************************************** - Begin with the baseclass for all elements... - **********************************************************/ -class CElement { -public: - - //break this struct out separately so that lame compilers don't gack. - struct CFlags { - PRUint32 mOmitEndTag:1; - PRUint32 mIsContainer:1; - PRUint32 mIsSinkContainer:1; - PRUint32 mDeprecated:1; - PRUint32 mOmitWS:1; - }; - - union { - PRUint32 mAllBits; - CFlags mProperties; - }; - - CElement(eHTMLTags aTag=eHTMLTag_unknown) { - mAllBits=0; - mTag=aTag; - mGroup.mAllBits=0; - mContainsGroups.mAllBits=0; - mAutoClose=mIncludeKids=mExcludeKids=0; - mDelegate=eHTMLTag_unknown; - } - - CElement( eHTMLTags aTag,CGroupMembers& aGroup) { - mAllBits=0; - mTag=aTag; - mGroup=aGroup; - mContainsGroups.mAllBits=0; - mAutoClose=mIncludeKids=mExcludeKids=0; - mDelegate=eHTMLTag_unknown; - } - - static CGroupMembers& GetEmptyGroup(void) { - static CGroupMembers theGroup={0}; - return theGroup; - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - anElement.mProperties.mIsContainer=0; - anElement.mProperties.mIsSinkContainer=0; - anElement.mTag=aTag; - anElement.mGroup.mAllBits=0;; - anElement.mContainsGroups.mAllBits=0; - } - - static void InitializeLeaf(CElement& anElement,eHTMLTags aTag,CGroupMembers& aGroup,CGroupMembers& aContainsGroups) { - anElement.mProperties.mIsContainer=PR_FALSE; - anElement.mProperties.mIsSinkContainer=PR_FALSE; - anElement.mTag=aTag; - anElement.mGroup.mAllBits=aGroup.mAllBits; - anElement.mContainsGroups.mAllBits=aContainsGroups.mAllBits; - } - - static void Initialize(CElement& anElement,eHTMLTags aTag,CGroupMembers& aGroup,CGroupMembers& aContainsGroups) { - anElement.mProperties.mIsContainer=PR_TRUE; - anElement.mProperties.mIsSinkContainer=PR_TRUE; - anElement.mTag=aTag; - anElement.mGroup.mAllBits=aGroup.mAllBits; - anElement.mContainsGroups.mAllBits=aContainsGroups.mAllBits; - } - - inline CElement* GetDelegate(void); - inline CElement* GetDefaultContainerFor(CElement* anElement); - - virtual PRBool CanContain(CElement* anElement,nsDTDContext* aContext); - virtual PRInt32 FindAutoCloseIndexForStartTag(CElement* anElement,PRInt32 aParentIndex,nsDTDContext* aContext); - virtual PRBool CanBeClosedByEndTag(CElement* anElement,nsDTDContext* aContext); - - //This tells us whether this tag can potentially close other blocks. - //That DOES NOT mean that this tag is necessarily a block itself (condsider TBODY,TR,TD...) - virtual PRBool IsBlockCloser(void) { - PRBool result=IsBlockElement(eHTMLTag_body); - if(!result) { - if(IsInlineElement(eHTMLTag_body) || - mGroup.mBits.mHead || - mGroup.mBits.mHeadMisc || - mGroup.mBits.mFormControl || - mGroup.mBits.mFrame || - mGroup.mBits.mLeaf || - mGroup.mBits.mComment || - mGroup.mBits.mTextContainer || - mGroup.mBits.mWhiteSpace) - result=PR_FALSE; - else result=PR_TRUE; - } - return result; - } - - //this tells us whether this tag is a block tag within the given parent - virtual PRBool IsBlockElement(eHTMLTags aParentID); - - //this tells us whether this tag is an inline tag within the given parent - //NOTE: aParentID is currently ignored, but shouldn't be. - virtual PRBool IsInlineElement(eHTMLTags aParentID); - - //this tells us whether the tag is a container as defined by HTML - //NOTE: aParentID is currently ignored, but shouldn't be. - virtual PRBool IsContainer(void) {return mProperties.mIsContainer; } - - //this tells us whether the tag should be opened as a container in the sink (script doesn't, for example). - virtual PRBool IsSinkContainer(void) { return mProperties.mIsSinkContainer; } - - virtual eHTMLTags GetSkipTarget(void) {return eHTMLTag_unknown;} - - - virtual nsresult WillHandleStartToken( CElement* anElement, - nsIParserNode* aNode, - eHTMLTags aTag, - nsDTDContext* aContext, - nsIHTMLContentSink* aSink); - - virtual nsresult HandleStartToken( nsCParserNode* aNode, - eHTMLTags aTag, - nsDTDContext* aContext, - nsIHTMLContentSink* aSink); - - virtual nsresult HandleEndToken(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink); - - virtual nsresult HandleMisplacedStartToken(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - return result; - } - - virtual PRInt32 FindAutoCloseTargetForEndTag(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink, PRInt32& anIndex) { - PRInt32 result=-1; - - if(mTag!=aTag) { - if(HasOptionalEndTag(mTag) && (0<anIndex)) { - eHTMLTags theGrandParentTag=aContext->TagAt(--anIndex); - CElement *theGrandParent=GetElement(theGrandParentTag); - if(theGrandParent) { - result=theGrandParent->FindAutoCloseTargetForEndTag(aNode,aTag,aContext,aSink,anIndex); //give the parent a chance... - } - } - } - else result=anIndex; - - return result; - } - - virtual nsresult HandleMisplacedEndToken(nsIParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - return result; - } - - nsresult AutoGenerateStructure(eHTMLTags *aTagList,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - - CStartToken theToken(*aTagList); - nsCParserNode theNode(&theToken, 0 /*stack token*/); - result=OpenContainer(&theNode,*aTagList,aContext,aSink); - if(eHTMLTag_unknown!=*(aTagList+1)) { - AutoGenerateStructure(++aTagList,aContext,aSink); - } - - CEndToken theEndToken(*aTagList--); - nsCParserNode theEndNode(&theEndToken, 0 /*stack token*/); - result=CloseContainer(&theEndNode,*aTagList,aContext,aSink); - - return result; - } - - - /********************************************************** - Call this for each element as it get's opened on the stack - **********************************************************/ - virtual nsresult NotifyOpen(nsIParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - return NS_OK; - } - - /********************************************************** - Call this for each element as it get's closed - **********************************************************/ - virtual nsresult NotifyClose(nsIParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - return NS_OK; - } - - /********************************************************** - this gets called after each tag is opened in the given context - **********************************************************/ - virtual nsresult OpenContainer(nsIParserNode *aNode,eHTMLTags aTag,nsDTDContext *aContext,nsIHTMLContentSink *aSink) { - return aSink->OpenContainer(*aNode); - } - - /********************************************************** - this gets called after each tag is opened in the given context - **********************************************************/ - virtual nsresult OpenContext(nsCParserNode *aNode,eHTMLTags aTag,nsDTDContext *aContext,nsIHTMLContentSink *aSink) { - aContext->Push(aNode, 0, PR_FALSE); - CElement *theElement = (aTag == mTag) ? this : GetElement(aTag); - theElement->NotifyOpen(aNode, aTag, aContext,aSink); - return NS_OK; - } - - /********************************************************** - this gets called after each tag is opened in the given context - **********************************************************/ - virtual nsresult OpenContainerInContext(nsCParserNode *aNode,eHTMLTags aTag,nsDTDContext *aContext,nsIHTMLContentSink *aSink) { - OpenContext(aNode,aTag,aContext,aSink); - return OpenContainer(aNode,aTag,aContext,aSink); - } - - /********************************************************** - this gets called to close a given tag in the sink - **********************************************************/ - virtual nsresult CloseContainer(nsIParserNode *aNode,eHTMLTags aTag,nsDTDContext *aContext,nsIHTMLContentSink *aSink) { - return aSink->CloseContainer(aTag); - } - - /********************************************************** - this gets called to close a tag in the given context - **********************************************************/ - virtual nsresult CloseContext(nsCParserNode *aNode,eHTMLTags aTag,nsDTDContext *aContext,nsIHTMLContentSink *aSink) { - nsresult result=NS_OK; - nsEntryStack *theStack=0; - nsCParserNode *theNode=aContext->Pop(theStack); - - CElement *theElement=(aTag==mTag) ? this : GetElement(aTag); - result=theElement->NotifyClose(theNode,aTag,aContext,aSink); - - IF_FREE(aNode, aContext->mNodeAllocator); - return result; - } - - /********************************************************** - this gets called to close a tag in the sink and in the context - **********************************************************/ - virtual nsresult CloseContainerInContext(nsCParserNode *aNode,eHTMLTags aTag,nsDTDContext *aContext,nsIHTMLContentSink *aSink) { - nsresult result=NS_OK; - if(mTag!=aTag) { - CElement *theElement=GetElement(aTag); - return theElement->CloseContainerInContext(aNode,aTag,aContext,aSink); - } - result=CloseContainer(aNode,aTag,aContext,aSink); - CloseContext(aNode,aTag,aContext,aSink); - return result; - } - - - CElement* GetElement(eHTMLTags aTag); - - eHTMLTags mTag; - eHTMLTags mDelegate; - CGroupMembers mGroup; - CGroupMembers mContainsGroups; - const eHTMLTags *mIncludeKids; - const eHTMLTags *mExcludeKids; - const eHTMLTags *mAutoClose; //other start tags that close this container -}; - - -/********************************************************** - This defines the Special element group - **********************************************************/ -class CLeafElement: public CElement { -public: - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theGroup={0}; - theGroup.mBits.mLeaf=1; - return theGroup; - } - - static CGroupMembers& GetContainedGroups(void) { - static CGroupMembers theGroups={0}; - return theGroups; - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::InitializeLeaf(anElement,aTag,GetGroup(),GetContainedGroups()); - } - - - CLeafElement(eHTMLTags aTag) : CElement(aTag) { - mProperties.mIsContainer=0; - } - -}; - -/********************************************************** - This defines elements that are deprecated - **********************************************************/ -class CDeprecatedElement: public CElement { -public: - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag); - anElement.mProperties.mDeprecated=1; - } - - CDeprecatedElement(eHTMLTags aTag) : CElement(aTag) { - CDeprecatedElement::Initialize(*this,aTag); - } - -}; - -/********************************************************** - This defines elements that are for use only by the DTD - **********************************************************/ -class CInlineElement: public CElement { -public: - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theGroup={0}; - theGroup.mBits.mInlineEntity=1; - return theGroup; - } - - static CGroupMembers& GetContainedGroups(void) { - static CGroupMembers theGroup={0}; - static PRBool initialized=PR_FALSE; - if(!initialized) { - initialized=PR_TRUE; - theGroup.mBits.mFormControl=1; - theGroup.mBits.mFontStyle =1; - theGroup.mBits.mPhrase=1; - theGroup.mBits.mSpecial=1; - theGroup.mBits.mList=0; //intentionally remove list from inline group - theGroup.mBits.mPreformatted=0; - theGroup.mBits.mSelf=1; - theGroup.mBits.mLeaf=1; - theGroup.mBits.mWhiteSpace=1; - theGroup.mBits.mComment=1; - theGroup.mBits.mInlineEntity=1; - } - return theGroup; - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,GetGroup(),GetContainedGroups()); - } - - - - CInlineElement(eHTMLTags aTag) : CElement(aTag) { - CInlineElement::Initialize(*this,aTag); - } - -}; - -/********************************************************** - This defines the Block element group - **********************************************************/ -class CBlockElement : public CElement { -public: - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theBlockGroup={0}; - theBlockGroup.mBits.mBlock=1; - return theBlockGroup; - } - - /********************************************************** - by default,members of the block group contain inline children - **********************************************************/ - static CGroupMembers& GetContainedGroups(PRBool aCanContainSelf = PR_TRUE) { - static CGroupMembers theGroups=CInlineElement::GetContainedGroups(); - theGroups.mBits.mSelf=aCanContainSelf; - return theGroups; - } - - /********************************************************** - call this if you want a group that contains only block elements... - **********************************************************/ - static CGroupMembers& GetBlockGroupMembers(void) { - static CGroupMembers theGroups={0}; - theGroups.mBits.mBlock=1; - theGroups.mBits.mSelf=1; - return theGroups; - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,GetGroup(),GetContainedGroups()); - } - - CBlockElement(eHTMLTags aTag) : CElement(aTag) { - CBlockElement::Initialize(*this,aTag); - } - -}; - - -/************************************************************ - This defines flowEntity elements that contain block+inline - ************************************************************/ -class CFlowElement: public CInlineElement { -public: - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theGroup={0}; - theGroup.mBits.mFlowEntity=1; - return theGroup; - } - - static CGroupMembers& GetContainedGroups(void) { - static CGroupMembers theGroup={0}; - theGroup=CInlineElement::GetContainedGroups(); - theGroup.mBits.mBlock=1; - theGroup.mBits.mBlockEntity=1; - return theGroup; - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,GetGroup(),GetContainedGroups()); - } - - CFlowElement(eHTMLTags aTag) : CInlineElement(aTag) { - CFlowElement::Initialize(*this,aTag); - } - -}; - -/********************************************************** - This defines the Phrase element group - **********************************************************/ -class CPhraseElement: public CElement { -public: - - static CGroupMembers& GetGroup(void) { - static CGroupMembers thePhraseGroup={0}; - thePhraseGroup.mBits.mPhrase=1; - return thePhraseGroup; - } - - static CGroupMembers& GetContainedGroups(void) { - static CGroupMembers theGroups=CInlineElement::GetContainedGroups(); - return theGroups; - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,GetGroup(),GetContainedGroups()); - } - - CPhraseElement(eHTMLTags aTag) : CElement(aTag) { - CPhraseElement::Initialize(*this,aTag); - } - -}; - -/********************************************************** - This defines the formcontrol element group - **********************************************************/ -class CFormControlElement: public CElement { -public: - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theGroup={0}; - theGroup.mBits.mFormControl=1; - return theGroup; - } - - static CGroupMembers& GetContainedGroups(void) { - static CGroupMembers theGroup={0}; - theGroup.mBits.mFormControl=1; - theGroup.mBits.mLeaf=1; - theGroup.mBits.mWhiteSpace=1; - return theGroup; - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,GetGroup(),GetContainedGroups()); - } - - CFormControlElement(eHTMLTags aTag) : CElement(aTag) { - CFormControlElement::Initialize(*this,aTag); - } - -}; - -/********************************************************** - This defines the form element itself - **********************************************************/ -class CFormElement: public CBlockElement { -public: - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,CBlockElement::GetGroup(),CBlockElement::GetBlockGroupMembers()); - } - - CFormElement() : CBlockElement(eHTMLTag_form) { - CFormElement::Initialize(*this,eHTMLTag_form); - mContainsGroups.mBits.mSelf=0; - mContainsGroups.mBits.mFormControl=1; - } - - virtual PRBool CanContain(CElement* anElement,nsDTDContext* aContext) { - PRBool result=CElement::CanContain(anElement,aContext); - if((!result) && (aContext->mFlags.mTransitional)) { - - //If we're in transitional mode, then also allow inline elements... - - CGroupMembers& theFlowGroup=CFlowElement::GetContainedGroups(); - result=ContainsGroup(theFlowGroup,anElement->mGroup); - } - return result; - } - - /********************************************************** - this gets called after each tag is opened in the given context - **********************************************************/ - virtual nsresult OpenContainer(nsIParserNode *aNode,eHTMLTags aTag,nsDTDContext *aContext,nsIHTMLContentSink *aSink) { - nsresult result=aSink->OpenForm(*aNode); - return result; - } - - -}; - -/********************************************************** - This defines the fontstyle element group - **********************************************************/ -class CFontStyleElement: public CElement { -public: - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theGroup={0}; - theGroup.mBits.mFontStyle=1; - return theGroup; - } - - static CGroupMembers& GetContainedGroups(void) { - static CGroupMembers theGroups=CInlineElement::GetContainedGroups(); - return theGroups; - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,GetGroup(),GetContainedGroups()); - } - - CFontStyleElement(eHTMLTags aTag) : CElement(aTag) { - CFontStyleElement::Initialize(*this,aTag); - } - -}; - - -/********************************************************** - This defines the special-inline element group - **********************************************************/ -class CSpecialElement : public CElement { -public: - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theGroup={0}; - theGroup.mBits.mSpecial=1; - return theGroup; - } - - static CGroupMembers& GetContainedGroups(void) { - static CGroupMembers theGroups=CInlineElement::GetContainedGroups(); - return theGroups; - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,GetGroup(),GetContainedGroups()); - } - - CSpecialElement(eHTMLTags aTag) : CElement(aTag) { - CSpecialElement::Initialize(*this,aTag); - } - -}; - - - -/********************************************************** - This defines the Table block itself, not it's children. - **********************************************************/ - -class CTableElement: public CElement { -public: - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theTableGroup={0}; - theTableGroup.mBits.mTable=1; - return theTableGroup; - } - - static CGroupMembers& GetContainedGroups(void) { - static CGroupMembers theGroups={0}; - theGroups.mBits.mTable=1; - return theGroups; - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,GetGroup(),GetContainedGroups()); - } - - CTableElement(eHTMLTags aTag=eHTMLTag_table) : CElement(aTag) { - CElement::Initialize(*this,aTag,CBlockElement::GetGroup(),CTableElement::GetContainedGroups()); - } - - PRBool CanContain(CElement* anElement,nsDTDContext* aContext) { - PRBool result=PR_FALSE; - - switch(anElement->mTag) { - - case eHTMLTag_caption: - result=(aContext->mTableStates && aContext->mTableStates->CanOpenCaption()); - break; - - case eHTMLTag_colgroup: - result=(aContext->mTableStates && aContext->mTableStates->CanOpenCols()); - break; - - case eHTMLTag_thead: //nothing to do for these empty tags... - result=(aContext->mTableStates && aContext->mTableStates->CanOpenTHead()); - break; - - case eHTMLTag_tfoot: - result=(aContext->mTableStates && aContext->mTableStates->CanOpenTFoot()); - break; - - case eHTMLTag_tr: - case eHTMLTag_th: - result=(aContext->mTableStates && aContext->mTableStates->CanOpenTBody()); - break; - - default: - result=CElement::CanContain(anElement,aContext); - break; - } - return result; - } - - /********************************************************** - Table needs to be notified so it can manage table states. - **********************************************************/ - virtual nsresult NotifyOpen(nsIParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - aContext->mTableStates=new CTableState(aContext->mTableStates); //create and prepend a new state - return NS_OK; - } - - /********************************************************** - Table needs to be notified so it can manage table states. - **********************************************************/ - virtual nsresult NotifyClose(nsIParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - - nsresult result=NS_OK; - if(aContext->mTableStates) { - - if(!aContext->mTableStates->mHasTBody) { - //so let's open a tbody, a TR and a TD for good measure... - - eHTMLTags theTags[]={eHTMLTag_tbody,eHTMLTag_tr,eHTMLTag_td,eHTMLTag_unknown}; - AutoGenerateStructure(theTags,aContext,aSink); - } - - //pop the current state and restore it's predecessor, if any... - CTableState *theState=aContext->mTableStates; - aContext->mTableStates=theState->mPrevious; - delete theState; - } - return result; - } - - /********************************************************** - Table handles the opening of it's own children - **********************************************************/ - virtual nsresult HandleStartToken(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - - switch(aTag) { - - case eHTMLTag_caption: - if(aContext->mTableStates && aContext->mTableStates->CanOpenCaption()) { - result=OpenContainerInContext(aNode,aTag,aContext,aSink); //force the title onto the stack - } - break; - - case eHTMLTag_col: - result=aSink->AddLeaf(*aNode); - break; - - case eHTMLTag_colgroup: - if(aContext->mTableStates && aContext->mTableStates->CanOpenCols()) { - result=OpenContainerInContext(aNode,aTag,aContext,aSink); //force the title onto the stack - } - break; - - case eHTMLTag_thead: //nothing to do for these empty tags... - if(aContext->mTableStates && aContext->mTableStates->CanOpenTHead()) { - aContext->mTableStates->mHasTHead=PR_TRUE; - result=OpenContainerInContext(aNode,aTag,aContext,aSink); //force the title onto the stack - } - break; - - case eHTMLTag_tbody: - aContext->mTableStates->mHasTBody=PR_TRUE; - result=OpenContainerInContext(aNode,aTag,aContext,aSink); //force the title onto the stack - break; - - case eHTMLTag_tfoot: - if(aContext->mTableStates && aContext->mTableStates->CanOpenTFoot()) { - aContext->mTableStates->mHasTFoot=PR_TRUE; - result=OpenContainerInContext(aNode,aTag,aContext,aSink); //force the title onto the stack - } - break; - - case eHTMLTag_tr: - case eHTMLTag_th: - - if(aContext->mTableStates) { - if(aContext->mTableStates->CanOpenTBody()) { - CToken* theToken=(CStartToken*)aContext->mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_tbody); - nsCParserNode* theNode=aContext->mNodeAllocator->CreateNode(theToken, 0); - - result=HandleStartToken(theNode,eHTMLTag_tbody,aContext,aSink); - } - if(NS_SUCCEEDED(result)) { - CElement *theElement=GetElement(eHTMLTag_tbody); - if(theElement) { - result=theElement->HandleStartToken(aNode,aTag,aContext,aSink); - } - } - } - - break; - - default: - break; - } - return result; - } - - /********************************************************** - Table handles the closing of it's own children - **********************************************************/ - virtual nsresult HandleEndToken(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - - if(aContext->HasOpenContainer(aTag)) { - switch(aTag) { - case eHTMLTag_caption: - case eHTMLTag_col: - case eHTMLTag_colgroup: - case eHTMLTag_tr: - case eHTMLTag_thead: - case eHTMLTag_tfoot: - case eHTMLTag_tbody: - result=CloseContainerInContext(aNode,aTag,aContext,aSink); //force the title onto the stack - break; - - default: - break; - } //switch - } //if - - - return result; - } - - /********************************************************** - If you're here, then children below you have optional - end tags, can't deal with the given tag, and want you - to handle it. - **********************************************************/ - virtual PRInt32 FindAutoCloseTargetForEndTag(nsIParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink, PRInt32& anIndex) { - - // XXXldb This method is completely unused because the |aNode| - // parameter is declared as |nsIParserNode| rather than - // |nsCParserNode| so it doesn't override the member function of - // CElement. - NS_NOTREACHED("This isn't used. Should it be?"); - - PRInt32 result=kNotFound; - - switch(aTag) { - case eHTMLTag_table: - case eHTMLTag_caption: - case eHTMLTag_col: - case eHTMLTag_colgroup: - case eHTMLTag_thead: - case eHTMLTag_tfoot: - case eHTMLTag_tbody: - case eHTMLTag_tr: - case eHTMLTag_td: - { - PRInt32 theTablePos=aContext->LastOf(eHTMLTag_table); - PRInt32 theTagPos=aContext->LastOf(aTag); - if((kNotFound!=theTagPos) && (theTablePos<=theTagPos)) { - result=theTagPos; - } - } - break; - - default: - break; - } //switch - - return result; - } - -}; - -/********************************************************** - This defines the Table block itself, not it's children. - **********************************************************/ - -class CTableRowElement: public CElement { -public: - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,CTableElement::GetGroup(),CElement::GetEmptyGroup()); - - static eHTMLTags kTRKids[]={eHTMLTag_td,eHTMLTag_th,eHTMLTag_unknown}; - anElement.mIncludeKids=kTRKids; - } - - CTableRowElement(eHTMLTags aTag=eHTMLTag_tr) : CElement(aTag) { - CTableRowElement::Initialize(*this,aTag); - mContainsGroups.mBits.mSelf=0; - } - - virtual nsresult HandleEndTokenForChild(CElement *aChild,nsIParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - return result; - } - -}; - -/********************************************************** - This defines the List element group (ol,ul,dir,menu) - **********************************************************/ -class CListElement: public CElement { -public: - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theListGroup={0}; - theListGroup.mBits.mList=1; - return theListGroup; - } - - static CGroupMembers& GetContainedGroups(void) { - static CGroupMembers theGroups=CInlineElement::GetContainedGroups(); - return theGroups; - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,GetGroup(),GetContainedGroups()); - } - - CListElement(eHTMLTags aTag) : CElement(aTag) { - CListElement::Initialize(*this,aTag); - } - -}; - -/********************************************************** - This defines the LI element... - - An interesting problem here is that LI normally contains - Block+inline, unless it's inside a MENU or DIR, in which - case it contains only inline. - **********************************************************/ -class CLIElement: public CElement { -public: - - CLIElement(eHTMLTags aTag=eHTMLTag_li) : CElement(aTag) { - CFlowElement::Initialize(*this,aTag); - mGroup.mAllBits=0; - mGroup.mBits.mList=1; - } - -}; - -/********************************************************** - This defines the counter element, and is for debug use. - - Usage: <counter name="xxx" reset=n> - - if you leave off the name key/value pair, we'll use the - name of the element instead. - **********************************************************/ -class CCounterElement: public CInlineElement { -public: - - CCounterElement(eHTMLTags aTag=eHTMLTag_counter) : CInlineElement(aTag) { - CInlineElement::Initialize(*this,aTag); - mProperties.mIsSinkContainer=PR_FALSE; - } - - /********************************************************** - handles the opening of it's own children - **********************************************************/ - virtual nsresult HandleStartToken( nsCParserNode* aNode, - eHTMLTags aTag, - nsDTDContext* aContext, - nsIHTMLContentSink* aSink) { - return CElement::HandleStartToken(aNode,aTag,aContext,aSink); - } - - - /********************************************************** - this gets called after each tag is opened in the given context - **********************************************************/ - virtual nsresult OpenContext(nsCParserNode *aNode,eHTMLTags aTag,nsDTDContext *aContext,nsIHTMLContentSink *aSink) { - CElement::OpenContext(aNode,aTag,aContext,aSink); - - nsresult result=NS_OK; - PRInt32 theCount=aContext->GetCount(); - - nsCParserNode *theNode = (nsCParserNode*)aNode; - -#ifdef DEBUG - eHTMLTags theGrandParentTag=aContext->TagAt(theCount-2); - nsAutoString theNumber; - aContext->IncrementCounter(theGrandParentTag,*theNode,theNumber); - - CTextToken theToken(theNumber); - nsCParserNode theNewNode(&theToken, 0 /*stack token*/); - *theNode = theNewNode; -#endif - result=aSink->AddLeaf(*theNode); - return result; - } - - /********************************************************** - handles the opening of it's own children - **********************************************************/ - virtual nsresult HandleEndToken(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - return CElement::HandleEndToken(aNode,aTag,aContext,aSink); - } - -}; - -/********************************************************** - This defines the heading element group (h1..h6) - **********************************************************/ -class CHeadingElement: public CElement { -public: - - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theGroup={0}; - theGroup.mBits.mHeading=1; - return theGroup; - } - - static CGroupMembers& GetContainedGroups(void) { - static CGroupMembers theGroups=CInlineElement::GetContainedGroups(); - return theGroups; - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,GetGroup(),GetContainedGroups()); - } - - CHeadingElement(eHTMLTags aTag) : CElement(aTag) { - CHeadingElement::Initialize(*this,aTag); - } - -}; - -/********************************************************** - This defines the tags that relate to frames - **********************************************************/ -class CFrameElement: public CElement { -public: - - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theGroup={0}; - theGroup.mBits.mFrame=1; - return theGroup; - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - anElement.mProperties.mIsContainer=1; - anElement.mProperties.mIsSinkContainer=1; - anElement.mTag=aTag; - anElement.mGroup.mAllBits=0; - anElement.mGroup.mBits.mFrame=1; - anElement.mContainsGroups.mAllBits=0; - anElement.mContainsGroups.mBits.mFrame=1; - anElement.mContainsGroups.mBits.mSelf=1; - } - - CFrameElement(eHTMLTags aTag) : CElement(aTag) { - CFrameElement::Initialize(*this,aTag); - } - -}; - - -/********************************************************** - This defines elements that are for use only by the DTD - **********************************************************/ -class CDTDInternalElement: public CElement { -public: - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - anElement.mProperties.mIsContainer=1; - anElement.mTag=aTag; - anElement.mContainsGroups.mAllBits=0; - anElement.mGroup.mBits.mDTDInternal=1; - } - - CDTDInternalElement(eHTMLTags aTag) : CElement(aTag) { - CDTDInternalElement::Initialize(*this,aTag); - } - - -}; - -/********************************************************** - Here comes the head element - **********************************************************/ -class CHeadElement: public CElement { -public: - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theHeadGroup={0}; - theHeadGroup.mBits.mTopLevel=1; - return theHeadGroup; - } - - static CGroupMembers& GetContentGroup(void) { - static CGroupMembers theGroup={0}; - theGroup.mBits.mHeadContent=1; - return theGroup; - } - - static CGroupMembers& GetMiscGroup(void) { - static CGroupMembers theGroup={0}; - theGroup.mBits.mHeadMisc=1; - return theGroup; - } - - static CGroupMembers& GetContainedGroups(void) { - static CGroupMembers theGroupsContainedByHead={0}; - theGroupsContainedByHead.mBits.mHeadMisc=1; - theGroupsContainedByHead.mBits.mHeadContent=1; - return theGroupsContainedByHead; - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,GetGroup(),GetContainedGroups()); - - static eHTMLTags kHeadKids[]={eHTMLTag_isindex,eHTMLTag_unknown}; - - anElement.mIncludeKids=kHeadKids; - } - - virtual nsresult OpenContext(nsIParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - - // XXXldb This method is completely unused because the |aNode| - // parameter is declared as |nsIParserNode| rather than - // |nsCParserNode| so it doesn't override the member function of - // CElement. - NS_NOTREACHED("This isn't used. Should it be?"); - - NS_ASSERTION(aContext!=nsnull,"cannot make a decision without a context"); - - nsresult result=NS_OK; - if(aSink && aContext) { - if(aContext->mFlags.mHasOpenHead==PR_FALSE) { - result=aSink->OpenHead(*aNode); - aContext->mFlags.mHasOpenHead=PR_TRUE; - } - } - return result; - } - - virtual nsresult CloseContext(nsIParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - - // XXXldb This method is completely unused because the |aNode| - // parameter is declared as |nsIParserNode| rather than - // |nsCParserNode| so it doesn't override the member function of - // CElement. - NS_NOTREACHED("This isn't used. Should it be?"); - - NS_ASSERTION(aContext!=nsnull,"cannot make a decision without a context"); - - nsresult result=NS_OK; - if(aSink && aContext) { - if(aContext->mFlags.mHasOpenHead==PR_TRUE) { - result = aSink->CloseHead(); - aContext->mFlags.mHasOpenHead=PR_FALSE; - } - } - return result; - } - - CHeadElement(eHTMLTags aTag) : CElement(aTag) { - CHeadElement::Initialize(*this,aTag); - } -}; - - -/********************************************************** - This class is for use with title, script, style - **********************************************************/ -class CTextContainer : public CElement { -public: - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theGroup={0}; - theGroup.mBits.mTextContainer=1; - return theGroup; - } - - static CGroupMembers& GetContainedGroups(void) { - static CGroupMembers theContainedGroups={0}; - theContainedGroups.mBits.mLeaf=1; - return theContainedGroups; - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,GetGroup(),GetContainedGroups()); - } - - CTextContainer(eHTMLTags aTag) : CElement(aTag) { - CTextContainer::Initialize(*this,aTag); - } - - virtual ~CTextContainer() { - } - - /********************************************************** - Call this for each element as it get's closed - **********************************************************/ - virtual nsresult NotifyClose(nsIParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - if(aNode) { -#if 0 - CStartToken theToken(aTag); - nsCParserNode theNode(&theToken); - theNode.SetSkippedContent(mText); - result=aSink->AddLeaf(theNode); -#endif - nsCParserNode *theNode=(nsCParserNode*)aNode; - //theNode->SetSkippedContent(mText); XXX why do we need this? - result=aSink->AddLeaf(*theNode); - } - mText.Truncate(0); - return result; - } - - /********************************************************** - Textcontainer handles the opening of it's own children - **********************************************************/ - virtual nsresult HandleStartToken(nsIParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - - // XXXldb This method is completely unused because the |aNode| - // parameter is declared as |nsIParserNode| rather than - // |nsCParserNode| so it doesn't override the member function of - // CElement. - NS_NOTREACHED("This isn't used. Should it be?"); - - nsresult result=NS_OK; - - switch(aTag) { - case eHTMLTag_text: - case eHTMLTag_whitespace: - mText.Append(aNode->GetText()); - break; - default: - break; - } - - return result; - } - - virtual nsresult HandleEndToken(nsIParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - - // XXXldb This method is completely unused because the |aNode| - // parameter is declared as |nsIParserNode| rather than - // |nsCParserNode| so it doesn't override the member function of - // CElement. - NS_NOTREACHED("This isn't used. Should it be?"); - - nsresult result=NS_OK; - return result; - } - - nsString mText; -}; - -/********************************************************** - This class is for the title element - **********************************************************/ -class CTitleElement : public CTextContainer { -public: - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CTextContainer::Initialize(anElement,aTag); - } - - CTitleElement() : CTextContainer(eHTMLTag_title) { - mGroup.mBits.mHeadMisc=1; - } - - /********************************************************** - Call this for each element as it get's closed - **********************************************************/ - virtual nsresult NotifyClose(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - - // XXXldb This method is completely unused because the |aNode| - // parameter is declared as |nsCParserNode| rather than - // |nsIParserNode| so it doesn't override the member function of - // CTextContainer. - NS_NOTREACHED("This isn't used. Should it be?"); - - nsresult result=NS_OK; - CElement* theHead=GetElement(eHTMLTag_head); - if(theHead) { - result=theHead->OpenContext(aNode,aTag,aContext,aSink); - if(NS_SUCCEEDED(result)) { - result=aSink->SetTitle(mText); - mText.Truncate(0); - if(NS_SUCCEEDED(result)) { - result=theHead->CloseContext(aNode,aTag,aContext,aSink); - } - } - } - return result; - } - - /********************************************************** - Title handles the opening of it's own children - **********************************************************/ - virtual nsresult HandleStartToken(nsIParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - - switch(aTag) { - case eHTMLTag_text: - if(aNode && aNode->GetTokenType()==eToken_entity) { - nsAutoString tmp; - aNode->TranslateToUnicodeStr(tmp); - mText.Append(tmp); - break; - } - case eHTMLTag_whitespace: - mText.Append(aNode->GetText()); - break; - default: - break; - } - - return result; - } - -}; - -/********************************************************** - This class is for the title element - **********************************************************/ -class CTextAreaElement: public CTextContainer { -public: - - CTextAreaElement() : CTextContainer(eHTMLTag_textarea) { - mGroup.mBits.mHeadMisc=1; - mGroup=CFormControlElement::GetGroup(); - mProperties.mIsSinkContainer=0; - } - - virtual nsresult HandleStartToken(nsIParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - - switch(aTag) { - case eHTMLTag_text: - if(aNode && aNode->GetTokenType()==eToken_entity) { - nsAutoString tmp; - aNode->TranslateToUnicodeStr(tmp); - mText.Append(tmp); - break; - } - case eHTMLTag_whitespace: - case eHTMLTag_newline: - mText.Append(aNode->GetText()); - break; - default: - break; - } - - return result; - } - - -}; - -/********************************************************** - This class is for use with style - **********************************************************/ -class CStyleElement: public CTextContainer { -public: - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CTextContainer::Initialize(anElement,aTag); - } - - CStyleElement() : CTextContainer(eHTMLTag_style) { - mGroup.mBits.mHeadMisc=1; - } - - /********************************************************** - Call this for each element as it get's closed - **********************************************************/ - virtual nsresult NotifyClose(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - - // XXXldb This method is completely unused because the |aNode| - // parameter is declared as |nsCParserNode| rather than - // |nsIParserNode| so it doesn't override the member function of - // CTextContainer. - NS_NOTREACHED("This isn't used. Should it be?"); - - nsresult result=NS_OK; - CElement* theHead=GetElement(eHTMLTag_head); - if(theHead) { - result=theHead->OpenContext(aNode,aTag,aContext,aSink); - if(NS_SUCCEEDED(result)) { - result=CTextContainer::NotifyClose(aNode,aTag,aContext,aSink); - mText.Truncate(0); - if(NS_SUCCEEDED(result)) { - result=theHead->CloseContext(aNode,aTag,aContext,aSink); - } - } - } - return result; - } - -}; - -/********************************************************** - This class is for use with script - **********************************************************/ -class CScriptElement: public CTextContainer { -public: - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CTextContainer::Initialize(anElement,aTag); - anElement.mProperties.mIsSinkContainer=PR_FALSE; - } - - CScriptElement() : CTextContainer(eHTMLTag_script) { - mGroup.mBits.mHeadMisc=1; - mGroup.mBits.mInlineEntity=1; - mGroup.mBits.mSpecial=1; - mProperties.mIsSinkContainer=PR_FALSE; - } - - /********************************************************** - this gets called after each tag is opened in the given context - **********************************************************/ - virtual nsresult OpenContainerInContext(nsCParserNode *aNode,eHTMLTags aTag,nsDTDContext *aContext,nsIHTMLContentSink *aSink) { - OpenContext(aNode,aTag,aContext,aSink); - return NS_OK; - } - - /********************************************************** - this gets called to close a tag in the given context - **********************************************************/ - virtual nsresult CloseContext(nsIParserNode *aNode,eHTMLTags aTag,nsDTDContext *aContext,nsIHTMLContentSink *aSink) { - - // XXXldb This method is completely unused because the |aNode| - // parameter is declared as |nsIParserNode| rather than - // |nsCParserNode| so it doesn't override the member function of - // CElement. - NS_NOTREACHED("This isn't used. Should it be?"); - - nsEntryStack* theStack=0; - nsIParserNode *theNode=aContext->Pop(theStack); - - CElement *theElement=(aTag==mTag) ? this : GetElement(aTag); - theElement->NotifyClose(theNode,aTag,aContext,aSink); - - return NS_OK; - } - - /********************************************************** - Call this for each element as it get's closed - **********************************************************/ - virtual nsresult NotifyClose(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - - // XXXldb This method is completely unused because the |aNode| - // parameter is declared as |nsCParserNode| rather than - // |nsIParserNode| so it doesn't override the member function of - // CTextContainer. - NS_NOTREACHED("This isn't used. Should it be?"); - - nsresult result=NS_OK; - - if(aContext->HasOpenContainer(eHTMLTag_body)) { - //add the script to the body - result=CTextContainer::NotifyClose(aNode,aTag,aContext,aSink); - } - else { - //add it to the head... - CElement* theHead=GetElement(eHTMLTag_head); - if(theHead) { - result=theHead->OpenContext(aNode,aTag,aContext,aSink); - if(NS_SUCCEEDED(result)) { - result=CTextContainer::NotifyClose(aNode,aTag,aContext,aSink); - if(NS_SUCCEEDED(result)) { - result=theHead->CloseContext(aNode,aTag,aContext,aSink); - } - } - } - } - mText.Truncate(0); - return result; - } - - -}; - -/********************************************************** - This defines the preformatted element group, (PRE). - **********************************************************/ -class CPreformattedElement: public CBlockElement { -public: - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CBlockElement::Initialize(anElement,aTag); - } - - CPreformattedElement(eHTMLTags aTag) : CBlockElement(aTag) { - mGroup=GetGroup(); - mContainsGroups=GetContainedGroups(); - mProperties.mIsContainer=1; - } - - /********************************************************** - Pre handles the opening of it's own children - **********************************************************/ - virtual nsresult HandleStartToken(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=CElement::HandleStartToken(aNode,aTag,aContext,aSink); - return result; - } - - - /********************************************************** - Pre handles the closing of it's own children - **********************************************************/ - virtual nsresult HandleEndToken(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=CElement::HandleEndToken(aNode,aTag,aContext,aSink); - return result; - } - -}; - -/********************************************************** - This is used for both applet and object elements - **********************************************************/ -class CAppletElement: public CSpecialElement { -public: - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theGroup={0}; - theGroup.mBits.mSpecial=1; - theGroup.mBits.mBlock=1; - return theGroup; - } - - static CGroupMembers& GetContainedGroups(void) { - return CFlowElement::GetContainedGroups(); - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,GetGroup(),GetContainedGroups()); - - static eHTMLTags kSpecialKids[]={eHTMLTag_param,eHTMLTag_unknown}; - anElement.mIncludeKids=kSpecialKids; - anElement.mProperties.mIsContainer=1; - } - - CAppletElement(eHTMLTags aTag) : CSpecialElement(aTag) { - Initialize(*this,aTag); - } - - /********************************************************** - handles the opening of it's own children - **********************************************************/ - virtual nsresult HandleStartToken(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - nsIParserNode *theNode=aContext->PeekNode(); - if(theNode) { - PRBool theContentsHaveArrived=theNode->GetGenericState(); - switch(aTag) { - case eHTMLTag_param: - if(!theContentsHaveArrived) { - result=CElement::HandleStartToken(aNode,aTag,aContext,aSink); - } - break; - - case eHTMLTag_newline: - case eHTMLTag_whitespace: - result=CElement::HandleStartToken(aNode,aTag,aContext,aSink); - break; - - default: - theNode->SetGenericState(PR_TRUE); - result=CElement::HandleStartToken(aNode,aTag,aContext,aSink); - break; - } //switch - } - return result; - } - -}; - -/********************************************************** - This defines the fieldset element... - **********************************************************/ -class CFieldsetElement: public CBlockElement { -public: - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theGroup={0}; - theGroup.mBits.mBlock=1; - return theGroup; - } - - static CGroupMembers& GetContainedGroups(void) { - return CFlowElement::GetContainedGroups(); - } - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,GetGroup(),GetContainedGroups()); - } - - CFieldsetElement() : CBlockElement(eHTMLTag_fieldset) { - mGroup=GetGroup(); - mContainsGroups=GetContainedGroups(); - mProperties.mIsContainer=1; - } - - /********************************************************** - fieldset handles the opening of it's own children - **********************************************************/ - virtual nsresult HandleStartToken(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - nsIParserNode *theNode=aContext->PeekNode(); - if(theNode) { - PRBool theLegendExists=theNode->GetGenericState(); - switch(aTag) { - case eHTMLTag_legend: - if(!theLegendExists) { - theNode->SetGenericState(PR_TRUE); - result=OpenContainerInContext(aNode,aTag,aContext,aSink); //force the title onto the stack - } - break; - default: - if(theLegendExists) { - result=CElement::HandleStartToken(aNode,aTag,aContext,aSink); //force the title onto the stack - } - break; - } //switch - } - return result; - } - -}; - -/********************************************************** - This is for FRAMESET, etc. - **********************************************************/ -class CTopLevelElement: public CElement { -public: - - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theGroup={0}; - theGroup.mBits.mTopLevel=1; - return theGroup; - } - - static CGroupMembers& GetContainedGroups(void) { - static CGroupMembers theGroup=CFlowElement::GetContainedGroups(); - return theGroup; - } - - - static void Initialize(CElement& anElement,eHTMLTags aTag){ - CElement::Initialize(anElement,aTag,GetGroup(),GetContainedGroups()); - } - - CTopLevelElement(eHTMLTags aTag) : CElement(aTag) { - CTopLevelElement::Initialize(*this,aTag); - } - - - /********************************************************** - Toplevel handles the opening of it's own children - **********************************************************/ - virtual nsresult HandleStartToken( nsCParserNode* aNode, - eHTMLTags aTag, - nsDTDContext* aContext, - nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - - switch(aTag) { - case eHTMLTag_unknown: - default: - result=CElement::HandleStartToken(aNode,aTag,aContext,aSink); - break; - }//switch - - return result; - } - - /********************************************************** - TopLevel handles the opening of it's own children - **********************************************************/ - virtual nsresult HandleEndToken(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - - nsresult result=NS_OK; - - switch(aTag) { - case eHTMLTag_html: - if(aContext->HasOpenContainer(aTag)) { - result=aSink->CloseHTML(); - CloseContext(aNode,aTag,aContext,aSink); - } - break; - - case eHTMLTag_body: - if(aContext->HasOpenContainer(aTag)) { - result=aSink->CloseBody(); - CloseContext(aNode,aTag,aContext,aSink); - } - break; - - case eHTMLTag_frameset: - if(aContext->HasOpenContainer(aTag)) { - result=aSink->OpenFrameset(*aNode); - CloseContext(aNode,aTag,aContext,aSink); - } - break; - - default: - result=CElement::HandleEndToken(aNode,aTag,aContext,aSink); - break; - }//switch - - return result; - } - -}; - - -/********************************************************** - This is for HTML only... - **********************************************************/ -class CHTMLElement: public CTopLevelElement{ -public: - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theBlockGroup={0}; - theBlockGroup.mBits.mTopLevel=1; - return theBlockGroup; - } - - static CGroupMembers& GetContainedGroups(void) { - static CGroupMembers theGroups={0}; - theGroups.mBits.mTopLevel=1; - return theGroups; - } - - CHTMLElement(eHTMLTags aTag) : CTopLevelElement(aTag) { - CElement::Initialize(*this,aTag,CHTMLElement::GetGroup(),CHTMLElement::GetContainedGroups()); - } - - /********************************************************** - HTML handles the opening of it's own children - **********************************************************/ - nsresult HandleDoctypeDecl( nsIParserNode* aNode, - eHTMLTags aTag, - nsDTDContext* aContext, - nsIHTMLContentSink* aSink) { - - nsCParserNode *theNode=(nsCParserNode*)aNode; - nsresult result=NS_OK; - if(theNode) { - nsAutoString theStr(theNode->mToken->GetStringValue()); - PRInt32 theLen=theStr.Length(); - //PRInt32 thePos=theStr.RFindChar(kGreaterThan); - - theStr.Truncate(theLen-1); - theStr.Cut(0,2); - - result = aSink->AddDocTypeDecl(*aNode); - } - return result; - } - - /********************************************************** - HTML handles the opening of it's own children - **********************************************************/ - virtual nsresult HandleStartToken( nsCParserNode* aNode, - eHTMLTags aTag, - nsDTDContext* aContext, - nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - - switch(aTag) { - case eHTMLTag_doctypeDecl: - result=HandleDoctypeDecl(aNode,aTag,aContext,aSink); - break; - - case eHTMLTag_frameset: - result=aSink->OpenFrameset(*aNode); - result=OpenContext(aNode,aTag,aContext,aSink); - aContext->mFlags.mHadFrameset=PR_TRUE; - break; - - case eHTMLTag_base: //nothing to do for these empty tags... - case eHTMLTag_isindex: - case eHTMLTag_link: - case eHTMLTag_meta: - { - CElement* theHead=GetElement(eHTMLTag_head); - if(theHead) { - result=theHead->OpenContext(aNode,aTag,aContext,aSink); - if(NS_SUCCEEDED(result)) { - result=aSink->AddLeaf(*aNode); - if(NS_SUCCEEDED(result)) { - result=theHead->CloseContext(aNode,aTag,aContext,aSink); - } - } - } - } - break; - - case eHTMLTag_object: - { - CElement* theHead=GetElement(eHTMLTag_head); - if(theHead) { - result=theHead->OpenContext(aNode,aTag,aContext,aSink); - if(NS_SUCCEEDED(result)) { - result=OpenContainerInContext(aNode,aTag,aContext,aSink); - } - } - } - break; - - case eHTMLTag_script: - case eHTMLTag_style: - case eHTMLTag_title: - result=OpenContext(aNode,aTag,aContext,aSink); //force the title onto the context stack - break; - - case eHTMLTag_newline: - case eHTMLTag_whitespace: - case eHTMLTag_comment: - break; - - - default: - CElement* theBody=GetElement(eHTMLTag_body); - if(theBody) { - CElement *theChildElement=GetElement(aTag); - if(theBody->CanContain(theChildElement,aContext)) { - //let's auto open the body - - CToken* theToken=(CStartToken*)aContext->mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_body); - nsCParserNode* theNode=aContext->mNodeAllocator->CreateNode(theToken, 0); - - result=theBody->HandleStartToken(theNode,eHTMLTag_body,aContext,aSink); - - if(NS_SUCCEEDED(result)) { - if(eHTMLTag_body==aContext->Last()) { - result=theBody->HandleStartToken(aNode,aTag,aContext,aSink); - } - } - } - } - //for now, let's drop other elements onto the floor. - break; - }//switch - - return result; - } - - /********************************************************** - HTML handles the closing of it's own children - **********************************************************/ - virtual nsresult HandleEndToken(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - - switch(aTag) { - case eHTMLTag_body: - aSink->CloseBody(); - result=CloseContext(aNode,aTag,aContext,aSink); - break; - - case eHTMLTag_frameset: - aSink->CloseFrameset(); - result=CloseContext(aNode,aTag,aContext,aSink); - break; - - case eHTMLTag_object: - result=CloseContainerInContext(aNode,aTag,aContext,aSink); - aSink->CloseHead(); - break; - - case eHTMLTag_script: - case eHTMLTag_style: - case eHTMLTag_title: - result=CloseContext(aNode,aTag,aContext,aSink); //close the title - break; - - case eHTMLTag_unknown: - default: - result=CTopLevelElement::HandleEndToken(aNode,aTag,aContext,aSink); - } - return result; - } - -}; - -/********************************************************** - This is for the body element... - **********************************************************/ -static const eHTMLTags gBodyKids[] = {eHTMLTag_button, eHTMLTag_del, eHTMLTag_ins, eHTMLTag_map,eHTMLTag_script, eHTMLTag_unknown}; -static const eHTMLTags gBodyExcludeKids[] = {eHTMLTag_applet, eHTMLTag_button, eHTMLTag_iframe, eHTMLTag_object, eHTMLTag_unknown}; - -class CBodyElement: public CElement { -public: - - - static CGroupMembers& GetGroup(void) { - static CGroupMembers theGroup={0}; - theGroup.mBits.mTopLevel=1; - return theGroup; - } - - CBodyElement(eHTMLTags aTag=eHTMLTag_body) : CElement(aTag) { - CGroupMembers theGroups=CBlockElement::GetBlockGroupMembers(); - CElement::Initialize(*this,aTag,CBodyElement::GetGroup(),theGroups); - mIncludeKids=gBodyKids; - mExcludeKids=gBodyExcludeKids; - } - - virtual PRBool CanContain(CElement* anElement,nsDTDContext* aContext) { - PRBool result=CElement::CanContain(anElement,aContext); - if((!result) && (aContext->mFlags.mTransitional)) { - //let's try so additions that are specific to the body tag, - //and only work in transitional mode... - - CGroupMembers& theFlowGroup=CFlowElement::GetContainedGroups(); - result=ContainsGroup(theFlowGroup,anElement->mGroup); - } - return result; - } - - //this gets called after each tag is opened in the given context - virtual nsresult OpenContainer(nsCParserNode *aNode,eHTMLTags aTag,nsDTDContext *aContext,nsIHTMLContentSink *aSink) { - - // XXXldb This method is completely unused because the |aNode| - // parameter is declared as |nsCParserNode| rather than - // |nsIParserNode| so it doesn't override the member function of - // CElement. - NS_NOTREACHED("This isn't used. Should it be?"); - - nsresult result=NS_OK; - if(mTag==aTag) { - // Close the head before opening a body. - CElement* theHead=GetElement(eHTMLTag_head); - result=theHead->CloseContext(aNode,aTag,aContext,aSink); - if(NS_SUCCEEDED(result)) { - result=aSink->OpenBody(*aNode); - } - } - else result=CElement::OpenContainer(aNode,aTag,aContext,aSink); - return result; - } - - /********************************************************** - this gets called after each tag is opened in the given context - **********************************************************/ - virtual nsresult OpenContainerInContext(nsCParserNode *aNode,eHTMLTags aTag,nsDTDContext *aContext,nsIHTMLContentSink *aSink) { - NS_ASSERTION(aContext!=nsnull,"need a valid context"); - nsresult result=NS_OK; - // Since BODY is optional, we might come across more than one BODY!. - // That is, one that's auto opened and one that came from the document itself. - // If that's the case then make sure that we don't open up multiple contexts, however, - // don't forget to inform the sink because it needs to account for the BODY attributes. - if(aContext) { - if(!aContext->mFlags.mHadBody) { - result=OpenContext(aNode,aTag,aContext,aSink); - aContext->mFlags.mHadBody=PR_TRUE; - } - } - return (NS_SUCCEEDED(result))? OpenContainer(aNode,aTag,aContext,aSink):result; - } - - /********************************************************** - Body handles the opening of it's own children - **********************************************************/ - virtual nsresult HandleStartToken( nsCParserNode* aNode, - eHTMLTags aTag, - nsDTDContext* aContext, - nsIHTMLContentSink* aSink) { - //for now, let's drop other elements onto the floor. - - nsresult result=CElement::HandleStartToken(aNode,aTag,aContext,aSink); - - if(NS_SUCCEEDED(result)) { - if(aNode) { - nsCParserNode* theNode=(nsCParserNode*)aNode; - eHTMLTokenTypes theType=eHTMLTokenTypes(theNode->GetTokenType()); - if(theType==eToken_start) { - CStartToken *theToken=(CStartToken*)theNode->mToken; - if(theToken && theToken->IsEmpty() && (aTag==aContext->Last())){ - result=CElement::HandleEndToken(aNode,aTag,aContext,aSink); - } - } - } - } - - return result; - } - - /********************************************************** - Body doesnt really need to handle it's own kids, but it's - a really convenient break point for debugging purposes. - **********************************************************/ - virtual nsresult HandleEndToken(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - switch(aTag) { - case eHTMLTag_script: - result=CloseContext(aNode,aTag,aContext,aSink); - break; - default: - result=CElement::HandleEndToken(aNode,aTag,aContext,aSink); - } - return result; - } - - /********************************************************** - Body is the default place where forwarding stops. - **********************************************************/ - virtual nsresult HandleEndTokenForChild(CElement *aChild,nsIParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - return result; - } - -protected: - - -}; - - -/************************************************************************ - This describes each group that each HTML element belongs to - ************************************************************************/ -class CElementTable { -public: - enum {eGroupCount=4}; - - CElementTable() : - - mBodyElement(eHTMLTag_body), - mFramesetElement(eHTMLTag_frameset), - mHTMLElement(eHTMLTag_html), - mScriptElement(), - mStyleElement(), - mTitleElement(), - mTextAreaElement(), - mPreElement(eHTMLTag_pre), - mLIElement(eHTMLTag_li), - mAppletElement(eHTMLTag_applet), - mObjectElement(eHTMLTag_object), - mFieldsetElement(), - mCounterElement(), - mFormElement(), - mHeadElement(eHTMLTag_head) - { - memset(mElements,0,sizeof(mElements)); - InitializeElements(); - - //DebugDumpBlockElements("test"); - //DebugDumpInlineElements("test"); - - //DebugDumpContainment("all elements"); - } - - //call this to get a ptr to an element prototype... - CElement* GetElement(eHTMLTags aTagID) { - if(aTagID>eHTMLTag_unknown) { - if(aTagID<eHTMLTag_userdefined) { - return mElements[aTagID]; - } - } - return 0; - } - - void InitializeElements(); -#ifdef DEBUG - void DebugDumpGroups(CElement* aParent); - void DebugDumpContainment(const char* aTitle); - void DebugDumpContainment(CElement* aParent); - - void DebugDumpInlineElements(const char* aTitle); - void DebugDumpBlockElements(const char* aTitle); -#endif - - CElement* mElements[150]; //add one here for special handling of a given element - CElement mDfltElements[150]; - - CBodyElement mBodyElement; - CFrameElement mFramesetElement; - CHTMLElement mHTMLElement; - CScriptElement mScriptElement; - CStyleElement mStyleElement; - CTitleElement mTitleElement; - CTextAreaElement mTextAreaElement; - CPreformattedElement mPreElement; - CTableElement mTableElement; - CLIElement mLIElement; - CAppletElement mAppletElement; - CAppletElement mObjectElement; - CFieldsetElement mFieldsetElement; - CCounterElement mCounterElement; - CFormElement mFormElement; - CHeadElement mHeadElement; -}; - - -static CElementTable *gElementTable = 0; - -static const eHTMLTags kDLKids[]={eHTMLTag_dd,eHTMLTag_dt,eHTMLTag_unknown}; -static const eHTMLTags kAutoCloseDD[]={eHTMLTag_dd,eHTMLTag_dt,eHTMLTag_dl,eHTMLTag_unknown}; -static const eHTMLTags kButtonExcludeKids[]={ eHTMLTag_a,eHTMLTag_button,eHTMLTag_select,eHTMLTag_textarea, - eHTMLTag_input,eHTMLTag_iframe,eHTMLTag_form,eHTMLTag_isindex, - eHTMLTag_fieldset,eHTMLTag_unknown}; -static const eHTMLTags kColgroupKids[]={eHTMLTag_col,eHTMLTag_unknown}; -static const eHTMLTags kDirKids[]={eHTMLTag_li,eHTMLTag_unknown}; -static const eHTMLTags kOptionGroupKids[]={eHTMLTag_option,eHTMLTag_unknown}; -static const eHTMLTags kFieldsetKids[]={eHTMLTag_legend,eHTMLTag_unknown}; -static const eHTMLTags kFormKids[]={eHTMLTag_script,eHTMLTag_unknown}; -static const eHTMLTags kLIExcludeKids[]={eHTMLTag_dir,eHTMLTag_menu,eHTMLTag_unknown}; -static const eHTMLTags kMapKids[]={eHTMLTag_area,eHTMLTag_unknown}; -static const eHTMLTags kPreExcludeKids[]={eHTMLTag_image,eHTMLTag_object,eHTMLTag_applet, - eHTMLTag_big,eHTMLTag_small,eHTMLTag_sub,eHTMLTag_sup, - eHTMLTag_font,eHTMLTag_basefont,eHTMLTag_unknown}; -static const eHTMLTags kSelectKids[]={eHTMLTag_optgroup,eHTMLTag_option,eHTMLTag_unknown}; -static const eHTMLTags kBlockQuoteKids[]={eHTMLTag_script,eHTMLTag_unknown}; -static const eHTMLTags kFramesetKids[]={eHTMLTag_noframes,eHTMLTag_unknown}; -static const eHTMLTags kObjectKids[]={eHTMLTag_param,eHTMLTag_unknown}; -static const eHTMLTags kTBodyKids[]={eHTMLTag_tr,eHTMLTag_unknown}; -static const eHTMLTags kUnknownKids[]={eHTMLTag_html,eHTMLTag_unknown}; - - -inline CElement* CElement::GetElement(eHTMLTags aTag) { - return gElementTable->mElements[aTag]; -} - - -/*********************************************************************************** - This method is pretty interesting, because it's where the elements all get - initialized for this elementtable. - ***********************************************************************************/ -void CElementTable::InitializeElements() { - - int max=sizeof(mElements)/sizeof(mElements[0]); - int index=0; - for(index=0;index<max;++index){ - mElements[index]=&mDfltElements[index]; - } - - CSpecialElement::Initialize( mDfltElements[eHTMLTag_a], eHTMLTag_a); - mDfltElements[eHTMLTag_a].mContainsGroups.mBits.mSelf=0; - - CPhraseElement::Initialize( mDfltElements[eHTMLTag_abbr], eHTMLTag_abbr); - CPhraseElement::Initialize( mDfltElements[eHTMLTag_acronym], eHTMLTag_acronym); - CBlockElement::Initialize( mDfltElements[eHTMLTag_address], eHTMLTag_address); - - CElement::Initialize( mDfltElements[eHTMLTag_applet], eHTMLTag_applet,CSpecialElement::GetGroup(), CFlowElement::GetContainedGroups()); - - CElement::Initialize( mDfltElements[eHTMLTag_area], eHTMLTag_area); - mDfltElements[eHTMLTag_area].mContainsGroups.mBits.mSelf=0; - - CFontStyleElement::Initialize( mDfltElements[eHTMLTag_b], eHTMLTag_b); - CElement::InitializeLeaf( mDfltElements[eHTMLTag_base], eHTMLTag_base, CHeadElement::GetMiscGroup(), CLeafElement::GetContainedGroups()); - - CElement::InitializeLeaf( mDfltElements[eHTMLTag_basefont], eHTMLTag_basefont, CSpecialElement::GetGroup(), CLeafElement::GetContainedGroups()); - - CSpecialElement::Initialize( mDfltElements[eHTMLTag_bdo], eHTMLTag_bdo); - CFontStyleElement::Initialize( mDfltElements[eHTMLTag_big], eHTMLTag_big); - CDeprecatedElement::Initialize( mDfltElements[eHTMLTag_bgsound], eHTMLTag_bgsound); - CElement::Initialize( mDfltElements[eHTMLTag_blockquote], eHTMLTag_blockquote, CBlockElement::GetGroup(), CBlockElement::GetBlockGroupMembers()); - mDfltElements[eHTMLTag_blockquote].mIncludeKids=kBlockQuoteKids; - - //CBodyElement::Initialize( mDfltElements[eHTMLTag_body], eHTMLTag_body); - CElement::InitializeLeaf( mDfltElements[eHTMLTag_br], eHTMLTag_br, CSpecialElement::GetGroup(), CLeafElement::GetContainedGroups()); - - CElement::Initialize( mDfltElements[eHTMLTag_button], eHTMLTag_button, CFormControlElement::GetGroup(), CFlowElement::GetContainedGroups()); - mDfltElements[eHTMLTag_button].mGroup.mBits.mBlock=1; //make this a member of the block group. - mDfltElements[eHTMLTag_button].mExcludeKids=kButtonExcludeKids; - - - CElement::Initialize( mDfltElements[eHTMLTag_caption], eHTMLTag_caption, CTableElement::GetGroup(), CSpecialElement::GetContainedGroups()); - mDfltElements[eHTMLTag_tr].mContainsGroups.mBits.mSelf=0; - - CElement::Initialize( mDfltElements[eHTMLTag_center], eHTMLTag_center, CBlockElement::GetGroup(), CFlowElement::GetContainedGroups()); - - CPhraseElement::Initialize( mDfltElements[eHTMLTag_cite], eHTMLTag_cite); - CPhraseElement::Initialize( mDfltElements[eHTMLTag_code], eHTMLTag_code); - CElement::Initialize( mDfltElements[eHTMLTag_col], eHTMLTag_col, CTableElement::GetGroup(), CLeafElement::GetContainedGroups()); - mDfltElements[eHTMLTag_col].mProperties.mIsContainer=0; - - CTableElement::Initialize( mDfltElements[eHTMLTag_colgroup], eHTMLTag_colgroup); - mDfltElements[eHTMLTag_colgroup].mContainsGroups.mAllBits=0; - mDfltElements[eHTMLTag_colgroup].mIncludeKids=kColgroupKids; - - CElement::Initialize( mDfltElements[eHTMLTag_counter], eHTMLTag_counter); - - CElement::Initialize( mDfltElements[eHTMLTag_dd], eHTMLTag_dd, CElement::GetEmptyGroup(), CFlowElement::GetContainedGroups()); - mDfltElements[eHTMLTag_dd].mAutoClose=kAutoCloseDD; - mDfltElements[eHTMLTag_dd].mContainsGroups.mBits.mSelf=0; - - CElement::Initialize( mDfltElements[eHTMLTag_del], eHTMLTag_del, CPhraseElement::GetGroup(), CFlowElement::GetContainedGroups()); - mDfltElements[eHTMLTag_del].mGroup.mBits.mBlock=1; //make this a member of the block group. - - CElement::Initialize( mDfltElements[eHTMLTag_dfn], eHTMLTag_dfn, CPhraseElement::GetGroup(), CInlineElement::GetContainedGroups()); - CBlockElement::Initialize( mDfltElements[eHTMLTag_dir], eHTMLTag_dir); - mDfltElements[eHTMLTag_dir].mGroup.mBits.mList=1; - mDfltElements[eHTMLTag_dir].mIncludeKids=kDirKids; - mDfltElements[eHTMLTag_dir].mContainsGroups.mAllBits=0; - - CElement::Initialize( mDfltElements[eHTMLTag_div], eHTMLTag_div, CBlockElement::GetGroup(), CFlowElement::GetContainedGroups()); - - CBlockElement::Initialize( mDfltElements[eHTMLTag_dl], eHTMLTag_dl); - mDfltElements[eHTMLTag_dl].mContainsGroups.mAllBits=0; - mDfltElements[eHTMLTag_dl].mIncludeKids=kDLKids; - - CElement::Initialize( mDfltElements[eHTMLTag_dt], eHTMLTag_dt, CElement::GetEmptyGroup(), CInlineElement::GetContainedGroups()); - mDfltElements[eHTMLTag_dt].mContainsGroups.mBits.mLeaf=1; - mDfltElements[eHTMLTag_dt].mAutoClose=kAutoCloseDD; - - CPhraseElement::Initialize( mDfltElements[eHTMLTag_em], eHTMLTag_em); - CElement::Initialize( mDfltElements[eHTMLTag_embed], eHTMLTag_embed); - CBlockElement::Initialize( mDfltElements[eHTMLTag_endnote], eHTMLTag_endnote); - - CElement::Initialize( mDfltElements[eHTMLTag_fieldset], eHTMLTag_fieldset, CBlockElement::GetGroup(), CFlowElement::GetContainedGroups()); - mDfltElements[eHTMLTag_fieldset].mIncludeKids=kFieldsetKids; - - CSpecialElement::Initialize( mDfltElements[eHTMLTag_font], eHTMLTag_font); - CElement::Initialize( mDfltElements[eHTMLTag_form], eHTMLTag_form, CBlockElement::GetGroup(), CBlockElement::GetBlockGroupMembers()); - mDfltElements[eHTMLTag_form].mContainsGroups.mBits.mFormControl=1; - mDfltElements[eHTMLTag_form].mIncludeKids=kFormKids; - - CElement::Initialize( mDfltElements[eHTMLTag_frame], eHTMLTag_frame, CFrameElement::GetGroup(), CLeafElement::GetContainedGroups()); - mDfltElements[eHTMLTag_frame].mProperties.mIsContainer=0; - - CFrameElement::Initialize( mDfltElements[eHTMLTag_frameset], eHTMLTag_frameset); - mDfltElements[eHTMLTag_frameset].mIncludeKids=kFramesetKids; - - CElement::Initialize( mDfltElements[eHTMLTag_h1], eHTMLTag_h1, CBlockElement::GetGroup(), CBlockElement::GetContainedGroups(PR_FALSE)); - CElement::Initialize( mDfltElements[eHTMLTag_h2], eHTMLTag_h2, CBlockElement::GetGroup(), CBlockElement::GetContainedGroups(PR_FALSE)); - CElement::Initialize( mDfltElements[eHTMLTag_h3], eHTMLTag_h3, CBlockElement::GetGroup(), CBlockElement::GetContainedGroups(PR_FALSE)); - CElement::Initialize( mDfltElements[eHTMLTag_h4], eHTMLTag_h4, CBlockElement::GetGroup(), CBlockElement::GetContainedGroups(PR_FALSE)); - CElement::Initialize( mDfltElements[eHTMLTag_h5], eHTMLTag_h5, CBlockElement::GetGroup(), CBlockElement::GetContainedGroups(PR_FALSE)); - CElement::Initialize( mDfltElements[eHTMLTag_h6], eHTMLTag_h6, CBlockElement::GetGroup(), CBlockElement::GetContainedGroups(PR_FALSE)); - - CElement::InitializeLeaf( mDfltElements[eHTMLTag_hr], eHTMLTag_hr, CBlockElement::GetGroup(), CLeafElement::GetContainedGroups()); - - - CElement::Initialize( mDfltElements[eHTMLTag_head], eHTMLTag_head, CHeadElement::GetGroup(), CHeadElement::GetContainedGroups()); - // InitializeElement( mDfltElements[eHTMLTag_head], eHTMLTag_html, CTopLevelElement::GetGroup(), CTopLevelElement::GetContainedGroups()); - - CFontStyleElement::Initialize( mDfltElements[eHTMLTag_i], eHTMLTag_i); - CElement::Initialize( mDfltElements[eHTMLTag_iframe], eHTMLTag_iframe, CSpecialElement::GetGroup(), CFlowElement::GetContainedGroups()); - mDfltElements[eHTMLTag_iframe].mGroup.mBits.mBlock=1; //make this a member of the block group. - - CElement::InitializeLeaf( mDfltElements[eHTMLTag_img], eHTMLTag_img, CSpecialElement::GetGroup(), CLeafElement::GetContainedGroups()); - CElement::Initialize( mDfltElements[eHTMLTag_image], eHTMLTag_image); - CElement::InitializeLeaf( mDfltElements[eHTMLTag_input], eHTMLTag_input, CFormControlElement::GetGroup(),CLeafElement::GetContainedGroups()); - CElement::Initialize( mDfltElements[eHTMLTag_ins], eHTMLTag_ins, CPhraseElement::GetGroup(), CFlowElement::GetContainedGroups()); - mDfltElements[eHTMLTag_ins].mGroup.mBits.mBlock=1; //make this a member of the block group. - - CElement::InitializeLeaf( mDfltElements[eHTMLTag_isindex], eHTMLTag_isindex, CBlockElement::GetGroup(), CLeafElement::GetContainedGroups()); - - CPhraseElement::Initialize( mDfltElements[eHTMLTag_kbd], eHTMLTag_kbd); - CDeprecatedElement::Initialize( mDfltElements[eHTMLTag_keygen], eHTMLTag_keygen); - - CElement::Initialize( mDfltElements[eHTMLTag_label], eHTMLTag_label, CFormControlElement::GetGroup(), CInlineElement::GetContainedGroups()); - mDfltElements[eHTMLTag_label].mContainsGroups.mBits.mSelf=0; - - CElement::Initialize( mDfltElements[eHTMLTag_legend], eHTMLTag_legend, CElement::GetEmptyGroup(), CInlineElement::GetContainedGroups()); - CElement::Initialize( mDfltElements[eHTMLTag_li], eHTMLTag_li, CListElement::GetGroup(), CFlowElement::GetContainedGroups()); - mDfltElements[eHTMLTag_li].mExcludeKids=kLIExcludeKids; - mDfltElements[eHTMLTag_li].mContainsGroups.mBits.mSelf=0; - - CElement::InitializeLeaf( mDfltElements[eHTMLTag_link], eHTMLTag_link, CHeadElement::GetMiscGroup(), CLeafElement::GetContainedGroups()); - CElement::Initialize( mDfltElements[eHTMLTag_listing], eHTMLTag_listing); - - CElement::Initialize( mDfltElements[eHTMLTag_map], eHTMLTag_map, CSpecialElement::GetGroup(), CBlockElement::GetBlockGroupMembers()); - mDfltElements[eHTMLTag_map].mIncludeKids=kMapKids; - - CBlockElement::Initialize( mDfltElements[eHTMLTag_menu], eHTMLTag_menu); - mDfltElements[eHTMLTag_menu].mGroup.mBits.mList=1; - mDfltElements[eHTMLTag_menu].mIncludeKids=kDirKids; - mDfltElements[eHTMLTag_menu].mContainsGroups.mAllBits=0; - - CElement::InitializeLeaf( mDfltElements[eHTMLTag_meta], eHTMLTag_meta, CHeadElement::GetMiscGroup(), CLeafElement::GetContainedGroups()); - - CElement::Initialize( mDfltElements[eHTMLTag_multicol], eHTMLTag_multicol); - CElement::Initialize( mDfltElements[eHTMLTag_nobr], eHTMLTag_nobr); - CElement::Initialize( mDfltElements[eHTMLTag_noembed], eHTMLTag_noembed); - - CElement::Initialize( mDfltElements[eHTMLTag_noframes], eHTMLTag_noframes, CBlockElement::GetGroup(), CFlowElement::GetContainedGroups()); - CElement::Initialize( mDfltElements[eHTMLTag_noscript], eHTMLTag_noscript, CBlockElement::GetGroup(), CFlowElement::GetContainedGroups()); - - CElement::Initialize( mDfltElements[eHTMLTag_object], eHTMLTag_object, CBlockElement::GetGroup(), CFlowElement::GetContainedGroups()); - mDfltElements[eHTMLTag_object].mGroup.mBits.mBlock=1; //make this a member of the block group. - mDfltElements[eHTMLTag_object].mGroup.mBits.mHeadMisc=1; - mDfltElements[eHTMLTag_object].mIncludeKids=kObjectKids; - - CBlockElement::Initialize( mDfltElements[eHTMLTag_ol], eHTMLTag_ol); - mDfltElements[eHTMLTag_ol].mGroup.mBits.mList=1; - mDfltElements[eHTMLTag_ol].mIncludeKids=kDirKids; - mDfltElements[eHTMLTag_ol].mContainsGroups.mAllBits=0; - - CElement::Initialize( mDfltElements[eHTMLTag_optgroup], eHTMLTag_optgroup, CElement::GetEmptyGroup(), CElement::GetEmptyGroup()); - mDfltElements[eHTMLTag_optgroup].mContainsGroups.mAllBits=0; - mDfltElements[eHTMLTag_optgroup].mIncludeKids=kOptionGroupKids; - - CElement::Initialize( mDfltElements[eHTMLTag_option], eHTMLTag_option, CElement::GetEmptyGroup(), CElement::GetEmptyGroup()); - mDfltElements[eHTMLTag_option].mContainsGroups.mAllBits=0; - mDfltElements[eHTMLTag_option].mContainsGroups.mBits.mLeaf=1; - - CElement::Initialize( mDfltElements[eHTMLTag_p], eHTMLTag_p, CBlockElement::GetGroup(), CInlineElement::GetContainedGroups()); - mDfltElements[eHTMLTag_p].mContainsGroups.mBits.mSelf=0; - - CElement::InitializeLeaf( mDfltElements[eHTMLTag_param], eHTMLTag_param, CElement::GetEmptyGroup(), CLeafElement::GetContainedGroups()); - CBlockElement::Initialize( mDfltElements[eHTMLTag_parsererror],eHTMLTag_parsererror); - CElement::Initialize( mDfltElements[eHTMLTag_plaintext], eHTMLTag_plaintext); - CBlockElement::Initialize( mDfltElements[eHTMLTag_pre], eHTMLTag_pre); - mDfltElements[eHTMLTag_pre].mExcludeKids=kPreExcludeKids; - - CElement::Initialize( mDfltElements[eHTMLTag_plaintext], eHTMLTag_plaintext); - CSpecialElement::Initialize( mDfltElements[eHTMLTag_q], eHTMLTag_q); - - CFontStyleElement::Initialize( mDfltElements[eHTMLTag_s], eHTMLTag_s); - CPhraseElement::Initialize( mDfltElements[eHTMLTag_samp], eHTMLTag_samp ); - CSpecialElement::Initialize( mDfltElements[eHTMLTag_script], eHTMLTag_script); - mDfltElements[eHTMLTag_script].mGroup.mBits.mBlock=1; //make this a member of the block group. - mDfltElements[eHTMLTag_script].mGroup.mBits.mHeadMisc=1; - - CFormControlElement::Initialize( mDfltElements[eHTMLTag_select], eHTMLTag_select); - mDfltElements[eHTMLTag_select].mContainsGroups.mAllBits=0; - mDfltElements[eHTMLTag_select].mIncludeKids=kSelectKids; - - CElement::Initialize( mDfltElements[eHTMLTag_server], eHTMLTag_server); - CFontStyleElement::Initialize( mDfltElements[eHTMLTag_small], eHTMLTag_small); - CElement::Initialize( mDfltElements[eHTMLTag_sourcetext], eHTMLTag_sourcetext); - CElement::Initialize( mDfltElements[eHTMLTag_spacer], eHTMLTag_spacer); - CSpecialElement::Initialize( mDfltElements[eHTMLTag_span], eHTMLTag_span); - CFontStyleElement::Initialize( mDfltElements[eHTMLTag_strike], eHTMLTag_strike); - CPhraseElement::Initialize( mDfltElements[eHTMLTag_strong], eHTMLTag_strong); - CHeadElement::Initialize( mDfltElements[eHTMLTag_style], eHTMLTag_style); - CSpecialElement::Initialize( mDfltElements[eHTMLTag_sub], eHTMLTag_sub); - CSpecialElement::Initialize( mDfltElements[eHTMLTag_sup], eHTMLTag_sup); - - CElement::Initialize( mDfltElements[eHTMLTag_table], eHTMLTag_table, CBlockElement::GetGroup(), CTableElement::GetContainedGroups()); - CElement::Initialize( mDfltElements[eHTMLTag_tbody], eHTMLTag_tbody, CTableElement::GetGroup(), CLeafElement::GetContainedGroups()); - mDfltElements[eHTMLTag_tbody].mIncludeKids=kTBodyKids; - - CElement::Initialize( mDfltElements[eHTMLTag_td], eHTMLTag_td, CElement::GetEmptyGroup(), CFlowElement::GetContainedGroups()); - mDfltElements[eHTMLTag_td].mContainsGroups.mBits.mSelf=0; - - CElement::Initialize( mDfltElements[eHTMLTag_textarea], eHTMLTag_textarea); - - CElement::Initialize( mDfltElements[eHTMLTag_tfoot], eHTMLTag_tfoot, CTableElement::GetGroup(), CLeafElement::GetContainedGroups()); - mDfltElements[eHTMLTag_tfoot].mIncludeKids=kTBodyKids; - mDfltElements[eHTMLTag_tfoot].mContainsGroups.mBits.mSelf=0; - - CElement::Initialize( mDfltElements[eHTMLTag_th], eHTMLTag_th, CElement::GetEmptyGroup(), CFlowElement::GetContainedGroups()); - mDfltElements[eHTMLTag_th].mContainsGroups.mBits.mSelf=0; - - CElement::Initialize( mDfltElements[eHTMLTag_thead], eHTMLTag_thead, CTableElement::GetGroup(), CLeafElement::GetContainedGroups()); - mDfltElements[eHTMLTag_thead].mIncludeKids=kTBodyKids; - - CTableRowElement::Initialize( mDfltElements[eHTMLTag_tr], eHTMLTag_tr); - mDfltElements[eHTMLTag_tr].mContainsGroups.mBits.mSelf=0; - - CElement::Initialize( mDfltElements[eHTMLTag_title], eHTMLTag_title); - - CFontStyleElement::Initialize( mDfltElements[eHTMLTag_tt], eHTMLTag_tt); - CFontStyleElement::Initialize( mDfltElements[eHTMLTag_u], eHTMLTag_u); - CBlockElement::Initialize( mDfltElements[eHTMLTag_ul], eHTMLTag_ul); - mDfltElements[eHTMLTag_ul].mGroup.mBits.mList=1; - mDfltElements[eHTMLTag_ul].mIncludeKids=kDirKids; - mDfltElements[eHTMLTag_ul].mContainsGroups.mAllBits=0; - - CPhraseElement::Initialize( mDfltElements[eHTMLTag_var], eHTMLTag_var); - CElement::Initialize( mDfltElements[eHTMLTag_wbr], eHTMLTag_wbr); - CElement::Initialize( mDfltElements[eHTMLTag_xmp], eHTMLTag_xmp); - - CLeafElement::Initialize( mDfltElements[eHTMLTag_text], eHTMLTag_text); - CLeafElement::Initialize( mDfltElements[eHTMLTag_comment], eHTMLTag_comment); - CLeafElement::Initialize( mDfltElements[eHTMLTag_newline], eHTMLTag_newline); - CLeafElement::Initialize( mDfltElements[eHTMLTag_whitespace],eHTMLTag_whitespace); - CLeafElement::Initialize( mDfltElements[eHTMLTag_unknown], eHTMLTag_unknown); - - CElement::Initialize(mDfltElements[eHTMLTag_userdefined], - eHTMLTag_userdefined, - CElement::GetEmptyGroup(), - CFlowElement::GetContainedGroups()); // allow userdefined tag to contain anything. - - mDfltElements[eHTMLTag_unknown].mIncludeKids=kUnknownKids; - - - /************************************************************ - Now let's initialize the elements that we created directly - to handle special cases. - ************************************************************/ - - mElements[eHTMLTag_body]=&mBodyElement; - mElements[eHTMLTag_frameset]=&mFramesetElement; - mElements[eHTMLTag_html]=&mHTMLElement; - mElements[eHTMLTag_script]=&mScriptElement; - mElements[eHTMLTag_style]=&mStyleElement; - mElements[eHTMLTag_title]=&mTitleElement; - mElements[eHTMLTag_textarea]=&mTextAreaElement; - mElements[eHTMLTag_pre]=&mPreElement; - mElements[eHTMLTag_table]=&mTableElement; - mElements[eHTMLTag_li]=&mLIElement; - mElements[eHTMLTag_applet]=&mAppletElement; - mElements[eHTMLTag_object]=&mObjectElement; - mElements[eHTMLTag_fieldset]=&mFieldsetElement; - mElements[eHTMLTag_counter]=&mCounterElement; - mElements[eHTMLTag_form]=&mFormElement; - mElements[eHTMLTag_head]=&mHeadElement; -} - -#ifdef DEBUG -void CElementTable::DebugDumpGroups(CElement* aTag){ - - const PRUnichar* uctag=nsHTMLTags::GetStringValue(aTag->mTag); - - const char* prefix=" "; - printf("\n\nTag: <%s>\n", NS_ConvertUCS2toUTF8(uctag).get()); - printf(prefix); - - if(aTag->IsContainer()) { - if(aTag->mContainsGroups.mBits.mHead) printf("head "); - if(aTag->mContainsGroups.mBits.mHeadMisc) printf("headmisc "); - if(aTag->mContainsGroups.mBits.mHeadContent) printf("headcontent "); - if(aTag->mContainsGroups.mBits.mTable) printf("table "); - if(aTag->mContainsGroups.mBits.mTextContainer) printf("text "); - if(aTag->mContainsGroups.mBits.mTopLevel) printf("toplevel "); - if(aTag->mContainsGroups.mBits.mDTDInternal) printf("internal "); - - if(aTag->mContainsGroups.mBits.mFlowEntity) { - printf("block inline "); - } - else { - - if (aTag->mContainsGroups.mBits.mBlockEntity) { - printf("blockEntity "); - } - - if (aTag->mContainsGroups.mBits.mBlock) { - printf("block "); - } - - if(aTag->mContainsGroups.mBits.mInlineEntity) { - printf("inline "); - } - - else { - - if(aTag->mContainsGroups.mBits.mFontStyle ) printf("fontstyle "); - if(aTag->mContainsGroups.mBits.mPhrase) printf("phrase "); - if(aTag->mContainsGroups.mBits.mSpecial) printf("special "); - if(aTag->mContainsGroups.mBits.mFormControl) printf("form "); - if(aTag->mContainsGroups.mBits.mHeading) printf("heading "); - if(aTag->mContainsGroups.mBits.mFrame) printf("frame "); - if(aTag->mContainsGroups.mBits.mList) printf("list "); - if(aTag->mContainsGroups.mBits.mPreformatted) printf("pre "); - if(aTag->mContainsGroups.mBits.mSelf) printf("self "); - if(aTag->mContainsGroups.mBits.mLeaf) printf("leaf "); - if(aTag->mContainsGroups.mBits.mWhiteSpace) printf("ws "); - if(aTag->mContainsGroups.mBits.mComment) printf("comment "); - } - - } - - if(aTag->mIncludeKids) { - printf("\n%s",prefix); - const eHTMLTags *theKid=aTag->mIncludeKids; - printf("+ "); - while(eHTMLTag_unknown!=*theKid){ - const PRUnichar *t = nsHTMLTags::GetStringValue(*theKid++); - printf("%s ", NS_ConvertUCS2toUTF8(t).get()); - } - } - - if(aTag->mExcludeKids) { - printf("\n%s",prefix); - const eHTMLTags *theKid=aTag->mExcludeKids; - printf("- "); - while(eHTMLTag_unknown!=*theKid){ - const PRUnichar *t = nsHTMLTags::GetStringValue(*theKid++); - printf("%s ", NS_ConvertUCS2toUTF8(t).get()); - } - } - - if(!aTag->mContainsGroups.mBits.mSelf){ - printf("\n%s - self",prefix); - } - - } - else { - printf("empty\n"); - } -} - -void CElementTable::DebugDumpContainment(CElement* anElement){ - const PRUnichar *uctag = nsHTMLTags::GetStringValue(anElement->mTag); - const char* prefix=" "; - printf("\n\nTag: <%s>\n", NS_ConvertUCS2toUTF8(uctag).get()); - printf(prefix); - - int count=0; - int i=0; - for(i=0;i<NS_HTML_TAG_MAX;++i){ - CElement* theChild=mElements[i]; - if(anElement->CanContain(theChild,0)){ - const PRUnichar *t = nsHTMLTags::GetStringValue(theChild->mTag); - printf("%s ", NS_ConvertUCS2toUTF8(t).get()); - ++count; - if(18==count) { - count=0; - printf("\n%s",prefix); - } - } - } -} - -void CElementTable::DebugDumpInlineElements(const char* aTitle) { - PRInt32 theTagID=eHTMLTag_unknown; - PRBool result=PR_FALSE; - - printf("Inline Elements -- %s: \n",aTitle); - while(theTagID<=eHTMLTag_userdefined) { - CElement *theTag=GetElement((eHTMLTags)theTagID); - if(theTag) { - result=theTag->IsInlineElement(eHTMLTag_unknown); - if(result) { - const PRUnichar *t = nsHTMLTags::GetStringValue(theTag->mTag); - printf(" %s\n", NS_ConvertUCS2toUTF8(t).get()); - } - } - theTagID++; - } -} - -void CElementTable::DebugDumpBlockElements(const char* aTitle) { - PRInt32 theTagID=eHTMLTag_unknown; - PRBool result=PR_FALSE; - - printf("Block Elements -- %s: \n",aTitle); - while(theTagID<=eHTMLTag_userdefined) { - CElement *theTag=GetElement((eHTMLTags)theTagID); - if(theTag) { - result=theTag->IsBlockElement(eHTMLTag_unknown); - if(result) { - const PRUnichar *theName = nsHTMLTags::GetStringValue(theTag->mTag); - printf(" %s\n", NS_ConvertUCS2toUTF8(theName).get()); - } - } - theTagID++; - } -} - -void CElementTable::DebugDumpContainment(const char* aTitle){ -#if 0 - DebugDumpContainment(mElements[eHTMLTag_head]); - DebugDumpContainment(mElements[eHTMLTag_html]); - DebugDumpContainment(mElements[eHTMLTag_table]); - printf("\n"); -#endif - - printf("==================================================\n"); - printf("%s\n",aTitle); - printf("==================================================\n"); - int i=0; - - for(i=1;i<NS_HTML_TAG_MAX;++i){ - DebugDumpContainment(mElements[i]); - //DebugDumpGroups(mElements[i]); - } //for -} -#endif - -/****************************************************************************** - Yes, I know it's inconvenient to find this methods here, but it's easier - for the compiler -- and making it's life easier is my top priority. - ******************************************************************************/ -PRInt32 CElement::FindAutoCloseIndexForStartTag(CElement* anElement,PRInt32 aParentIndex,nsDTDContext* aContext) { - PRInt32 result=kNotFound; - - if(anElement) { - eHTMLTags theParentTag=aContext->TagAt(aParentIndex); - if(eHTMLTag_unknown!=theParentTag) { - - CElement* theParent=gElementTable->mElements[theParentTag]; - - if(!theParent->CanContain(anElement,aContext)) { - if(HasOptionalEndTag(theParentTag)) { - - if(ListContainsTag(theParent->mAutoClose,anElement->mTag)) { - result=theParent->FindAutoCloseIndexForStartTag(anElement,aParentIndex-1,aContext); - } - else if((theParent->mTag==anElement->mTag) && (!theParent->mContainsGroups.mBits.mSelf)){ - result=aParentIndex; - } - else if(eHTMLTag_body!=theParent->mTag) { - result=theParent->FindAutoCloseIndexForStartTag(anElement,aParentIndex-1,aContext); - } - else result=aParentIndex+1; - } - else result=kNotFound; - } - else result=aParentIndex+1; - } - } - - return result; -} - -/****************************************************************************** - Yes, I know it's inconvenient to find this methods here, but it's easier - for the compiler -- and making it's life easier is my top priority. - ******************************************************************************/ -PRBool CElement::CanBeClosedByEndTag(CElement* anElement,nsDTDContext* aContext) { - PRBool result=PR_FALSE; - - //first, let's see if we can contain the given tag based on group info... - if(anElement) { - if(ListContainsTag(mAutoClose,anElement->mTag)) { - return PR_TRUE; - } - else if((this==anElement) && (!mContainsGroups.mBits.mSelf)){ - return PR_TRUE; - } - else { - eHTMLTags theTag=aContext->Last(); - CElement* theElement=gElementTable->mElements[theTag]; - if(HasOptionalEndTag(theTag)) { - if(anElement->CanContain(theElement,aContext)){ - result=PR_TRUE; - } - } - } - } - return result; -} - -/****************************************************************************** - Yes, I know it's inconvenient to find this methods here, but it's easier - for the compiler -- and making it's life easier is my top priority. - ******************************************************************************/ -PRBool CElement::CanContain(CElement* anElement,nsDTDContext* aContext) { - PRBool result=PR_FALSE; - - //first, let's see if we can contain the given tag based on group info... - if(anElement) { - if(!anElement->mProperties.mDeprecated) { - if(anElement!=this) { - if(ListContainsTag(mExcludeKids,anElement->mTag)) { - return PR_FALSE; - } - else if(ContainsGroup(mContainsGroups,anElement->mGroup)) { - result=PR_TRUE; - } - else if(ListContainsTag(mIncludeKids,anElement->mTag)) { - return PR_TRUE; - } - } - else result=mContainsGroups.mBits.mSelf; - } - - /*************************************************** - This is a (cheesy) exception table, that allows - us to override containment for transitional - documents. A better implementation would be to - create unique classes for each of the tags in - this table, and to override CanContain() there. - ***************************************************/ - - if((!result) && (aContext->mFlags.mTransitional)) { - switch(mTag) { - case eHTMLTag_address: - if(eHTMLTag_p==anElement->mTag) - result=PR_TRUE; - break; - - case eHTMLTag_blockquote: - case eHTMLTag_form: - case eHTMLTag_iframe: - result=ContainsGroup(CFlowElement::GetContainedGroups(),anElement->mGroup); - break; - - case eHTMLTag_button: - if((eHTMLTag_iframe==anElement->mTag) || (eHTMLTag_isindex==anElement->mTag)) - result=PR_TRUE; - break; - - default: - break; - } - } - } - return result; -} - -nsresult CElement::WillHandleStartToken( CElement *anElement, - nsIParserNode* aNode, - eHTMLTags aTag, - nsDTDContext* aContext, - nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - return result; -} - -nsresult CElement::HandleStartToken( nsCParserNode* aNode, - eHTMLTags aTag, - nsDTDContext* aContext, - nsIHTMLContentSink* aSink) { - - CElement* theElement=gElementTable->mElements[aTag]; - - nsresult result=WillHandleStartToken(theElement,aNode,aTag,aContext,aSink); - -#if 0 - CElement* theDelegate=theElement->GetDelegate(); - if(theDelegate) { - result=theDelegate->HandleStartToken(aNode,aTag,aContext,aSink); - } - else -#endif - - { - if(theElement) { - if(CanContain(theElement,aContext)) { - - if(theElement->IsContainer()) { - if(theElement->IsSinkContainer()) { - result=theElement->OpenContainerInContext(aNode,aTag,aContext,aSink); - } - else { - result=theElement->OpenContext(aNode,aTag,aContext,aSink); - } - } - else { - result=aSink->AddLeaf(*aNode); - } - } - else if(theElement->IsBlockCloser()){ - - //Ok, so we have a start token that is misplaced. Before handing this off - //to a default container (parent), let's check the autoclose condition. - if(HasOptionalEndTag(mTag)) { - - //aha! We have a case where this tag is autoclosed by anElement. - //Let's close this container, then try to open theElement. - PRInt32 theCount=aContext->GetCount(); - PRInt32 theIndex=FindAutoCloseIndexForStartTag(theElement,theCount-2,aContext); - - //continue ripping code out here... - - if(kNotFound!=theIndex) { - eHTMLTags theParentTag=eHTMLTag_unknown; - CElement* theParent=0; - - while(NS_SUCCEEDED(result) && (theCount>theIndex)) { - - theParentTag=aContext->Last(); - theParent=gElementTable->mElements[theParentTag]; - - nsCParserNode *theNode=aContext->PeekNode(); //this will get popped later... - if(theParent->IsSinkContainer()) { - CloseContainerInContext(theNode,theParentTag,aContext,aSink); - } - else CloseContext(theNode,theParentTag,aContext,aSink); - theCount--; - } - - if(NS_SUCCEEDED(result)){ - theParentTag=aContext->Last(); - theParent=gElementTable->mElements[theParentTag]; - result=theParent->HandleStartToken(aNode,aTag,aContext,aSink); - } - } - return result; - } - else { - - PRBool theElementCanOpen=PR_FALSE; - - //the logic here is simple: - // This operation can only succeed if the given tag is open, AND - // all the tags below it have optional end tags. - // If these conditions aren't met, we bail out, leaving the tag open. - - if(mTag!=aTag) { - PRInt32 theLastPos=aContext->LastOf(aTag); //see if it's already open... - if(-1!=theLastPos) { - PRInt32 theCount=aContext->GetCount(); - result=HandleEndToken(aNode,aTag,aContext,aSink); - theElementCanOpen=PRBool(aContext->GetCount()<theCount); - } - } - - if(theElementCanOpen) { - if(NS_SUCCEEDED(result)){ - eHTMLTags theParentTag=aContext->Last(); - CElement* theParent=gElementTable->mElements[theParentTag]; - return theParent->HandleStartToken(aNode,aTag,aContext,aSink); - } - } - } - - //ok, here's our last recourse -- let's let the parent handle it. - CElement* theContainer=GetDefaultContainerFor(theElement); - if(theContainer) { - result=theContainer->HandleMisplacedStartToken(aNode,aTag,aContext,aSink); - } - } - } - } - return result; -} - - -nsresult CElement::HandleEndToken(nsCParserNode* aNode,eHTMLTags aTag,nsDTDContext* aContext,nsIHTMLContentSink* aSink) { - nsresult result=NS_OK; - - if(aContext->Last()==aTag) { - CElement* theElement=gElementTable->mElements[aTag]; - if(theElement) { - if(theElement->IsSinkContainer()) { - result=CloseContainerInContext(aNode,aTag,aContext,aSink); - } - else result=CloseContext(aNode,aTag,aContext,aSink); - return result; - } - } - - PRInt32 theCount=aContext->GetCount(); - PRInt32 theIndex=theCount-1; - - PRInt32 theCloseTarget=FindAutoCloseTargetForEndTag(aNode,aTag,aContext,aSink,theIndex); - - if(-1!=theCloseTarget) { - while(theCloseTarget<theCount) { - eHTMLTags theTag=aContext->Last(); - eHTMLTags theGrandParentTag=aContext->TagAt(theCount-2); - CElement *theGrandParent=GetElement(theGrandParentTag); - result=theGrandParent->HandleEndToken(aNode,theTag,aContext,aSink); - theCount--; - } - //return result; - } - return result; -} - - -inline CElement* CElement::GetDelegate(void) { - if(eHTMLTag_unknown!=mDelegate) { - return gElementTable->mElements[mDelegate]; - } - return 0; -} - -inline CElement* CElement::GetDefaultContainerFor(CElement* anElement) { - CElement* result=0; - - if(anElement) { - if(anElement->mGroup.mBits.mBlock) { - result=gElementTable->mElements[eHTMLTag_body]; - } - else if(anElement->mGroup.mBits.mHeadContent) { - result=gElementTable->mElements[eHTMLTag_head]; - } - else if(anElement->mGroup.mBits.mHeadMisc) { - result=gElementTable->mElements[eHTMLTag_head]; - } - } - return result; -} - - - //this tells us whether this tag is a block tag within the given parent - //NOTE: aParentID is currently ignored, but shouldn't be. -PRBool CElement::IsBlockElement(eHTMLTags aParentID) { - CGroupMembers& theBlockGroup=CBlockElement::GetBlockGroupMembers(); - PRBool result=ContainsGroup(theBlockGroup,mGroup); - return result; -} - - //this tells us whether this tag is an inline tag within the given parent - //NOTE: aParentID is currently ignored, but shouldn't be. -PRBool CElement::IsInlineElement(eHTMLTags aParentID) { - CGroupMembers& theInlineGroup=CInlineElement::GetContainedGroups(); - PRBool result=ContainsGroup(theInlineGroup,mGroup); - return result; -} - - -#endif diff --git a/htmlparser/src/CParserContext.cpp b/htmlparser/src/CParserContext.cpp deleted file mode 100644 index dadad15e4364..000000000000 --- a/htmlparser/src/CParserContext.cpp +++ /dev/null @@ -1,190 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - - -#include "nsIAtom.h" -#include "CParserContext.h" -#include "nsToken.h" -#include "prenv.h" -#include "nsHTMLTokenizer.h" -#include "nsExpatDriver.h" - -MOZ_DECL_CTOR_COUNTER(CParserContext) - -/** - * Your friendly little constructor. Ok, it's not the friendly, but the only guy - * using it is the parser. - * @update gess7/23/98 - * @param aScanner - * @param aKey - * @param aListener - */ -CParserContext::CParserContext(nsScanner* aScanner, - void *aKey, - eParserCommands aCommand, - nsIRequestObserver* aListener, - nsIDTD *aDTD, - eAutoDetectResult aStatus, - PRBool aCopyUnused) -{ - MOZ_COUNT_CTOR(CParserContext); - - mScanner=aScanner; - mKey=aKey; - mPrevContext=0; - mListener=aListener; - NS_IF_ADDREF(mListener); - mDTDMode=eDTDMode_unknown; - mAutoDetectStatus=aStatus; - mTransferBuffer=0; - mDTD=aDTD; - NS_IF_ADDREF(mDTD); - mTokenizer = 0; - mTransferBufferSize=eTransferBufferSize; - mStreamListenerState=eNone; - mMultipart=PR_TRUE; - mContextType=eCTNone; - mCopyUnused=aCopyUnused; - mParserCommand=aCommand; - mRequest=0; -} - -/** - * Your friendly little constructor. Ok, it's not the friendly, but the only guy - * using it is the parser. - * @update gess7/23/98 - * @param aScanner - * @param aKey - * @param aListener - */ -CParserContext::CParserContext(const CParserContext &aContext) : mMimeType() { - MOZ_COUNT_CTOR(CParserContext); - - mScanner=aContext.mScanner; - mKey=aContext.mKey; - mPrevContext=0; - mListener=aContext.mListener; - NS_IF_ADDREF(mListener); - - mDTDMode=aContext.mDTDMode; - mAutoDetectStatus=aContext.mAutoDetectStatus; - mTransferBuffer=aContext.mTransferBuffer; - mDTD=aContext.mDTD; - NS_IF_ADDREF(mDTD); - - mTokenizer = aContext.mTokenizer; - NS_IF_ADDREF(mTokenizer); - - mTransferBufferSize=eTransferBufferSize; - mStreamListenerState=aContext.mStreamListenerState; - mMultipart=aContext.mMultipart; - mContextType=aContext.mContextType; - mRequest=aContext.mRequest; - mParserCommand=aContext.mParserCommand; - SetMimeType(aContext.mMimeType); -} - - -/** - * Destructor for parser context - * NOTE: DO NOT destroy the dtd here. - * @update gess7/11/98 - */ -CParserContext::~CParserContext(){ - - MOZ_COUNT_DTOR(CParserContext); - - if(mScanner) { - delete mScanner; - mScanner=nsnull; - } - - if(mTransferBuffer) - delete [] mTransferBuffer; - - NS_IF_RELEASE(mDTD); - NS_IF_RELEASE(mListener); - NS_IF_RELEASE(mTokenizer); - - //Remember that it's ok to simply ingore the PrevContext. - -} - - -/** - * Set's the mimetype for this context - * @update rickg 03.18.2000 - */ -void CParserContext::SetMimeType(const nsACString& aMimeType){ - mMimeType.Assign(aMimeType); - - mDocType=ePlainText; - - if(mMimeType.Equals(NS_LITERAL_CSTRING(kHTMLTextContentType))) - mDocType=eHTML_Strict; - else if (mMimeType.Equals(NS_LITERAL_CSTRING(kXMLTextContentType)) || - mMimeType.Equals(NS_LITERAL_CSTRING(kXMLApplicationContentType)) || - mMimeType.Equals(NS_LITERAL_CSTRING(kXHTMLApplicationContentType)) || - mMimeType.Equals(NS_LITERAL_CSTRING(kXULTextContentType)) || -#ifdef MOZ_SVG - mMimeType.Equals(NS_LITERAL_CSTRING(kSVGTextContentType)) || -#endif - mMimeType.Equals(NS_LITERAL_CSTRING(kRDFTextContentType))) - mDocType=eXML; -} - -nsresult -CParserContext::GetTokenizer(PRInt32 aType, nsITokenizer*& aTokenizer) { - nsresult result = NS_OK; - - if(!mTokenizer) { - if (aType == NS_IPARSER_FLAG_HTML || mParserCommand == eViewSource) { - result = NS_NewHTMLTokenizer(&mTokenizer,mDTDMode,mDocType,mParserCommand); - // Propagate tokenizer state so that information is preserved - // between document.write. This fixes bug 99467 - if (mTokenizer && mPrevContext) - mTokenizer->CopyState(mPrevContext->mTokenizer); - } - else if (aType == NS_IPARSER_FLAG_XML) - { - result = CallQueryInterface(mDTD, &mTokenizer); - } - } - - aTokenizer = mTokenizer; - return result; -} diff --git a/htmlparser/src/CParserContext.h b/htmlparser/src/CParserContext.h deleted file mode 100644 index 695a8603d452..000000000000 --- a/htmlparser/src/CParserContext.h +++ /dev/null @@ -1,110 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -/** - * MODULE NOTES: - * @update gess 4/1/98 - * - */ - -#ifndef __CParserContext -#define __CParserContext - -#include "nsIParser.h" -#include "nsIURL.h" -#include "nsIDTD.h" -#include "nsIStreamListener.h" -#include "nsIRequest.h" -#include "nsScanner.h" -#include "nsString.h" -#include "nsCOMPtr.h" - -/** - * Note that the parser is given FULL access to all - * data in a parsercontext. Hey, that what it's for! - */ - -class CParserContext { - -public: - - enum {eTransferBufferSize=4096}; - enum eContextType {eCTNone,eCTURL,eCTString,eCTStream}; - - CParserContext( nsScanner* aScanner, - void* aKey=0, - eParserCommands aCommand=eViewNormal, - nsIRequestObserver* aListener=0, - nsIDTD *aDTD=0, - eAutoDetectResult aStatus=eUnknownDetect, - PRBool aCopyUnused=PR_FALSE); - - CParserContext( const CParserContext& aContext); - ~CParserContext(); - - nsresult GetTokenizer(PRInt32 aType, nsITokenizer*& aTokenizer); - void SetMimeType(const nsACString& aMimeType); - - nsCOMPtr<nsIRequest> mRequest; // provided by necko to differnciate different input streams - // why is mRequest strongly referenced? see bug 102376. - nsIDTD* mDTD; - nsIRequestObserver* mListener; - char* mTransferBuffer; - void* mKey; - nsITokenizer* mTokenizer; - CParserContext* mPrevContext; - nsScanner* mScanner; - - nsCString mMimeType; - nsDTDMode mDTDMode; - - eParserDocType mDocType; - eStreamState mStreamListenerState; //this is really only here for debug purposes. - eContextType mContextType; - eAutoDetectResult mAutoDetectStatus; - eParserCommands mParserCommand; //tells us to viewcontent/viewsource/viewerrors... - - PRPackedBool mMultipart; - PRPackedBool mCopyUnused; - PRUint32 mTransferBufferSize; -}; - - - -#endif - - diff --git a/htmlparser/src/jar.mn b/htmlparser/src/jar.mn deleted file mode 100644 index eff9719dfa6d..000000000000 --- a/htmlparser/src/jar.mn +++ /dev/null @@ -1,2 +0,0 @@ -en-US.jar: - locale/en-US/communicator/layout/xmlparser.properties diff --git a/htmlparser/src/nsDTDUtils.cpp b/htmlparser/src/nsDTDUtils.cpp deleted file mode 100644 index 92bb8a765318..000000000000 --- a/htmlparser/src/nsDTDUtils.cpp +++ /dev/null @@ -1,1663 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Pierre Phaneuf <pp@ludusdesign.com> - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - - -#include "nsIAtom.h" -#include "nsDTDUtils.h" -#include "CNavDTD.h" -#include "nsIParserNode.h" -#include "nsParserNode.h" -#include "nsIChannel.h" -#include "nsIServiceManager.h" -#include "nsUnicharUtils.h" - -MOZ_DECL_CTOR_COUNTER(nsEntryStack) -MOZ_DECL_CTOR_COUNTER(nsDTDContext) -MOZ_DECL_CTOR_COUNTER(nsTokenAllocator) -MOZ_DECL_CTOR_COUNTER(CNodeRecycler) - -/************************************************************************************** - A few notes about how residual style handling is performed: - - 1. The style stack contains nsTagEntry elements. - 2. Every tag on the containment stack can have it's own residual style stack. - 3. When a style leaks, it's mParent member is set to the level on the stack where - it originated. A node with an mParent of 0 is not opened on tag stack, - but is open on stylestack. - 4. An easy way to tell that a container on the element stack is a residual style tag - is that it's use count is >1. - - **************************************************************************************/ - - -/** - * Default constructor - * @update harishd 04/04/99 - * @update gess 04/22/99 - */ -nsEntryStack::nsEntryStack() { - - MOZ_COUNT_CTOR(nsEntryStack); - - mCapacity=0; - mCount=0; - mEntries=0; -} - -/** - * Default destructor - * @update harishd 04/04/99 - * @update gess 04/22/99 - */ -nsEntryStack::~nsEntryStack() { - - MOZ_COUNT_DTOR(nsEntryStack); - - if(mEntries) { - //add code here to recycle the node if you have one... - delete [] mEntries; - mEntries=0; - } - - mCount=mCapacity=0; -} - -/** - * Release all objects in the entry stack - */ -void -nsEntryStack::ReleaseAll(nsNodeAllocator* aNodeAllocator) -{ - NS_WARN_IF_FALSE(aNodeAllocator,"no allocator? - potential leak!"); - - if(aNodeAllocator) { - NS_WARN_IF_FALSE(mCount >= 0,"count should not be negative"); - while(mCount > 0) { - nsCParserNode* node=this->Pop(); - IF_FREE(node,aNodeAllocator); - } - } -} - -/** - * Resets state of stack to be empty. - * @update harishd 04/04/99 - */ -void nsEntryStack::Empty(void) { - mCount=0; -} - - -/** - * - * @update gess 04/22/99 - */ -void nsEntryStack::EnsureCapacityFor(PRInt32 aNewMax,PRInt32 aShiftOffset) { - if(mCapacity<aNewMax){ - - const int kDelta=16; - - PRInt32 theSize = kDelta * ((aNewMax / kDelta) + 1); - nsTagEntry* temp=new nsTagEntry[theSize]; - mCapacity=theSize; - - if(temp){ - PRInt32 index=0; - for(index=0;index<mCount;++index) { - temp[aShiftOffset+index]=mEntries[index]; - } - if(mEntries) delete [] mEntries; - mEntries=temp; - } - else{ - //XXX HACK! This is very bad! We failed to get memory. - } - } //if -} - -/** - * - * @update gess 04/22/99 - */ -void nsEntryStack::Push(nsCParserNode* aNode, - nsEntryStack* aStyleStack, - PRBool aRefCntNode) -{ - if(aNode) { - EnsureCapacityFor(mCount+1); - mEntries[mCount].mTag = (eHTMLTags)aNode->GetNodeType(); - if (aRefCntNode) { - aNode->mUseCount++; - mEntries[mCount].mNode = NS_CONST_CAST(nsCParserNode*,aNode); - IF_HOLD(mEntries[mCount].mNode); - } - mEntries[mCount].mParent=aStyleStack; - mEntries[mCount++].mStyles=0; - } -} - - -/** - * This method inserts the given node onto the front of this stack - * - * @update gess 11/10/99 - */ -void nsEntryStack::PushFront(nsCParserNode* aNode, - nsEntryStack* aStyleStack, - PRBool aRefCntNode) -{ - if(aNode) { - if(mCount<mCapacity) { - PRInt32 index=0; - for(index=mCount;index>0;index--) { - mEntries[index]=mEntries[index-1]; - } - } - else { - EnsureCapacityFor(mCount+1,1); - } - mEntries[0].mTag = (eHTMLTags)aNode->GetNodeType(); - if (aRefCntNode) { - aNode->mUseCount++; - mEntries[0].mNode = NS_CONST_CAST(nsCParserNode*,aNode); - IF_HOLD(mEntries[0].mNode); - } - mEntries[0].mParent=aStyleStack; - mEntries[0].mStyles=0; - ++mCount; - } -} - -/** - * - * @update gess 11/10/99 - */ -void nsEntryStack::Append(nsEntryStack *aStack) { - if(aStack) { - - PRInt32 theCount=aStack->mCount; - - EnsureCapacityFor(mCount+aStack->mCount,0); - - PRInt32 theIndex=0; - for(theIndex=0;theIndex<theCount;++theIndex){ - mEntries[mCount]=aStack->mEntries[theIndex]; - mEntries[mCount++].mParent=0; - } - } -} - -/** - * This method removes the node for the given tag - * from anywhere within this entry stack, and shifts - * other entries down. - * - * NOTE: It's odd to be removing an element from the middle - * of a stack, but it's necessary because of how MALFORMED - * html can be. - * - * anIndex: the index within the stack of the tag to be removed - * aTag: the id of the tag to be removed - * @update gess 02/25/00 - */ -nsCParserNode* nsEntryStack::Remove(PRInt32 anIndex, - eHTMLTags aTag) -{ - nsCParserNode* result = 0; - if (0 < mCount && anIndex < mCount){ - result = mEntries[anIndex].mNode; - if (result) - result->mUseCount--; - PRInt32 theIndex = 0; - mCount -= 1; - for( theIndex = anIndex; theIndex < mCount; ++theIndex){ - mEntries[theIndex] = mEntries[theIndex+1]; - } - mEntries[mCount].mNode = 0; - mEntries[mCount].mStyles = 0; - nsEntryStack* theStyleStack = mEntries[anIndex].mParent; - if (theStyleStack) { - //now we have to tell the residual style stack where this tag - //originated that it's no longer in use. - PRUint32 scount = theStyleStack->mCount; - PRUint32 sindex = 0; - nsTagEntry *theStyleEntry=theStyleStack->mEntries; - for (sindex=scount-1;sindex>0;--sindex){ - if (theStyleEntry->mTag==aTag) { - theStyleEntry->mParent=0; //this tells us that the style is not open at any level - break; - } - ++theStyleEntry; - } //for - } - } - return result; -} - -/** - * - * @update harishd 04/04/99 - * @update gess 04/21/99 - */ -nsCParserNode* nsEntryStack::Pop(void) -{ - nsCParserNode* result = 0; - if (0 < mCount) { - result = mEntries[--mCount].mNode; - if (result) - result->mUseCount--; - mEntries[mCount].mNode = 0; - mEntries[mCount].mStyles = 0; - nsEntryStack* theStyleStack=mEntries[mCount].mParent; - if (theStyleStack) { - //now we have to tell the residual style stack where this tag - //originated that it's no longer in use. - PRUint32 scount = theStyleStack->mCount; - PRUint32 sindex = 0; - nsTagEntry *theStyleEntry=theStyleStack->mEntries; - for (sindex=scount-1;sindex>0;--sindex){ - if (theStyleEntry->mTag==mEntries[mCount].mTag) { - theStyleEntry->mParent=0; //this tells us that the style is not open at any level - break; - } - ++theStyleEntry; - } //for - } - } - return result; -} - -/** - * - * @update harishd 04/04/99 - * @update gess 04/21/99 - */ -eHTMLTags nsEntryStack::First() const -{ - eHTMLTags result=eHTMLTag_unknown; - if(0<mCount){ - result=mEntries[0].mTag; - } - return result; -} - -/** - * - * @update harishd 04/04/99 - * @update gess 04/21/99 - */ -nsCParserNode* nsEntryStack::NodeAt(PRInt32 anIndex) const -{ - nsCParserNode* result=0; - if((0<mCount) && (anIndex<mCount)) { - result=mEntries[anIndex].mNode; - } - return result; -} - -/** - * - * @update harishd 04/04/99 - * @update gess 04/21/99 - */ -eHTMLTags nsEntryStack::TagAt(PRInt32 anIndex) const -{ - eHTMLTags result=eHTMLTag_unknown; - if((0<mCount) && (anIndex<mCount)) { - result=mEntries[anIndex].mTag; - } - return result; -} - -/** - * - * @update gess 04/21/99 - */ -nsTagEntry* nsEntryStack::EntryAt(PRInt32 anIndex) const -{ - nsTagEntry *result=0; - if((0<mCount) && (anIndex<mCount)) { - result=&mEntries[anIndex]; - } - return result; -} - - -/** - * - * @update harishd 04/04/99 - * @update gess 04/21/99 - */ -eHTMLTags nsEntryStack::operator[](PRInt32 anIndex) const -{ - eHTMLTags result=eHTMLTag_unknown; - if((0<mCount) && (anIndex<mCount)) { - result=mEntries[anIndex].mTag; - } - return result; -} - - -/** - * - * @update harishd 04/04/99 - * @update gess 04/21/99 - */ -eHTMLTags nsEntryStack::Last(void) const -{ - eHTMLTags result=eHTMLTag_unknown; - if(0<mCount) { - result=mEntries[mCount-1].mTag; - } - return result; -} - -nsTagEntry* -nsEntryStack::PopEntry() -{ - nsTagEntry* entry = EntryAt(mCount-1); - this->Pop(); - return entry; -} - -void nsEntryStack::PushEntry(nsTagEntry* aEntry, - PRBool aRefCntNode) -{ - if (aEntry) { - EnsureCapacityFor(mCount+1); - mEntries[mCount].mNode = aEntry->mNode; - mEntries[mCount].mTag = aEntry->mTag; - mEntries[mCount].mParent = aEntry->mParent; - mEntries[mCount].mStyles = aEntry->mStyles; - if (aRefCntNode && mEntries[mCount].mNode) { - mEntries[mCount].mNode->mUseCount++; - IF_HOLD(mEntries[mCount].mNode); - } - mCount++; - } -} - -/*************************************************************** - Now define the dtdcontext class - ***************************************************************/ - - -/** - * - * @update gess 04.21.2000 - */ -nsDTDContext::nsDTDContext() : mStack(), mEntities(0){ - - MOZ_COUNT_CTOR(nsDTDContext); - mResidualStyleCount=0; - mContextTopIndex=-1; - mTableStates=0; - mTokenAllocator=0; - mNodeAllocator=0; - mAllBits=0; - -#ifdef DEBUG - memset(mXTags,0,sizeof(mXTags)); - mCounters = 0; -#endif -} - -#ifdef DEBUG - class CEntityDeallocator: public nsDequeFunctor{ - public: - virtual void* operator()(void* anObject) { - CNamedEntity *theEntity=(CNamedEntity*)anObject; - delete theEntity; - return 0; - } - }; -#endif - -/** - * - * @update gess9/10/98 - */ -nsDTDContext::~nsDTDContext() { - MOZ_COUNT_DTOR(nsDTDContext); - - while(mTableStates) { - //pop the current state and restore it's predecessor, if any... - CTableState *theState=mTableStates; - mTableStates=theState->mPrevious; - delete theState; - } - -#ifdef DEBUG - CEntityDeallocator theDeallocator; - mEntities.ForEach(theDeallocator); - if(mCounters) { - delete [] mCounters; - mCounters = 0; - } -#endif - -} - -#ifdef DEBUG - -CNamedEntity* nsDTDContext::GetEntity(const nsAString& aName)const { - if (aName.Length() > 2) { - nsAString::const_iterator start, end; - aName.BeginReading(start); - aName.EndReading(end); - - PRUnichar theChar=aName.Last(); - - // skip past leading and trailing quotes/etc - if(kQuote==*start) { - ++start; - } - - if(kSemicolon==theChar || - kQuote == theChar) { - --end; - } - - - const nsAString& entityName = Substring(start, end); - - PRInt32 theCount=mEntities.GetSize(); - PRInt32 theIndex=0; - for(theIndex=0;theIndex<theCount;++theIndex) { - CNamedEntity *theResult=(CNamedEntity*)mEntities.ObjectAt(theIndex); - if(theResult && - theResult->mName.Equals(entityName, - nsCaseInsensitiveStringComparator())) { - return theResult; - } - } - } - return 0; -} - -CNamedEntity* nsDTDContext::RegisterEntity(const nsAString& aName,const nsAString& aValue) { - CNamedEntity *theEntity=GetEntity(aName); - if(!GetEntity(aName)){ - theEntity=new CNamedEntity(aName,aValue); - mEntities.Push(theEntity); - } - return theEntity; -} - -/**************************************************************** - The abacus class is useful today for debug purposes, but it - will eventually serve as the implementation for css counters. - - This implementation is fine for static documents, but woefully - inadequate for dynamic documents. (This about what happens if - someone inserts a new counter using the DOM? -- The other - numbers in that "group" should be renumbered.) - - In order to be dynamic, we need a counter "group" manager that - is aware of layout (geometry at least) -- and that has a - mechanism for notifying markers that need to be updated, along - with the ability to cause incremental reflow to occur in a - localized context (so the counters display correctly). - - ****************************************************************/ - -class CAbacus { -public: - - enum eNumFormat {eUnknown,eAlpha,eDecimal,eRoman,eSpelled,eHex,eBinary,eFootnote,eUserSeries}; - - CAbacus(PRInt32 aDefaultValue=0,eNumFormat aFormat=eDecimal) { - mUserSeries=0; - mFormat=aFormat; - mCase=PR_FALSE; - mValue=0; - mUserBase=0; - } - - ~CAbacus() { - } - - void SetValue(int aStartValue) {mValue=aStartValue;} - void SetNumberingStyle(eNumFormat aFormat) {mFormat=aFormat;} - void SetUserSeries(const char* aSeries,int aUserBase) {mUserSeries=aSeries; mUserBase=aUserBase;} - void SetCase(PRBool alwaysUpper) {mCase=alwaysUpper;} - - void GetNextValueAsString(nsString& aString) { - GetFormattedString(mFormat,mValue++,aString,mUserSeries,0,mUserBase); - } - - void GetValueAsString(nsString& aString) { - GetFormattedString(mFormat,mValue,aString,mUserSeries,0,mUserBase); - } - - - /** - * Get a counter string in the given style for the given value. - * - * @update rickg 6June2000 - * - * @param aFormat -- format of choice - * @param aValue -- cardinal value of string - * @param aString -- will hold result - */ - static void GetFormattedString(eNumFormat aFormat,PRInt32 aValue, nsString& aString,const char* aCharSet, int anOffset, int aBase) { - switch (aFormat) { - case eDecimal: DecimalString(aValue,aString); break; - case eHex: HexString(aValue,aString); break; - case eBinary: BinaryString(aValue,aString); break; - case eAlpha: AlphaString(aValue,aString); break; - case eSpelled: SpelledString(aValue,aString); break; - case eRoman: RomanString(aValue,aString); break; - case eFootnote: FootnoteString(aValue,aString); break; - case eUserSeries: SeriesString(aValue,aString,aCharSet,anOffset,aBase); break; - default: - DecimalString(aValue,aString); break; - } - } - - /** - * Compute a counter string in the casted-series style for the given value. - * - * @update rickg 6June2000 - * - * @param aValue -- cardinal value of string - * @param aString -- will hold result - */ - static void SeriesString(PRInt32 aValue,nsString& aString,const char* aCharSet, int offset, int base) { - int ndex=0; - int root=1; - int next=base; - int expn=1; - - aString.Truncate(); - if(aValue<0) - aString.Append(PRUnichar('-')); - - aValue=abs(aValue); // must be positive here... - while(next<=aValue) { // scale up in baseN; exceed current value. - root=next; - next*=base; - ++expn; - } - - while(expn--) { - ndex = ((root<=aValue) && (root)) ? (aValue/root): 0; - aValue%=root; - aString.Append(PRUnichar(aCharSet[ndex+((root>1)*offset)])); - root/=base; - } - } - - /** - * Compute a counter string in the spelled style for the given value. - * - * @update rickg 6June2000 - * - * @param aValue -- cardinal value of string - * @param aString -- will hold result - */ - static void SpelledString(PRInt32 aValue,nsString& aString) { - - static char ones[][12]= {"zero","one ","two ","three ","four ","five ","six ","seven ","eight ","nine ","ten "}; - static char teens[][12]= {"ten ","eleven ","twelve ","thirteen ","fourteen ","fifteen ","sixteen ","seventeen ","eighteen ","nineteen "}; - static char tens[][12]= {"","ten ","twenty ","thirty ","fourty ","fifty ","sixty ","seventy ","eighty ","ninety ","hundred "}; - static char bases[][20]= {"","hundred ","thousand ","million ","billion ","trillion ","quadrillion ","quintillion ","bajillion "}; - - aString.Truncate(); - if(aValue<0) - aString.Append(PRUnichar('-')); - - PRInt32 root=1000000000; - PRInt32 expn=4; - PRInt32 modu=0; - - aValue=abs(aValue); - if(0<aValue) { - - while(root && aValue) { - PRInt32 temp=aValue/root; - if(temp) { - PRInt32 theDiv=temp/100; - if (theDiv) {//start with hundreds part - aString.AppendWithConversion(ones[theDiv]); - aString.AppendWithConversion(bases[1]); - } - modu=(temp%10); - theDiv=(temp%100)/10; - if (theDiv) { - if (theDiv<2) { - aString.AppendWithConversion(teens[modu]); - modu=0; - } - else aString.AppendWithConversion(tens[theDiv]); - } - if (modu) - aString.AppendWithConversion(ones[modu]); //do remainder - aValue-=(temp*root); - if (expn>1) - aString.AppendWithConversion(bases[expn]); - } - expn--; - root/=1000; - } - } - else aString.AppendWithConversion(ones[0]); - } - - /** - * Compute a counter string in the decimal format for the given value. - * - * @update rickg 6June2000 - * - * @param aValue -- cardinal value of string - * @param aString -- will hold result - */ - static void DecimalString(PRInt32 aValue,nsString& aString) { - aString.Truncate(); - aString.AppendInt(aValue); - } - - /** - * Compute a counter string in binary format for the given value. - * - * @update rickg 6June2000 - * - * @param aValue -- cardinal value of string - * @param aString -- will hold result - */ - static void BinaryString(PRInt32 aValue,nsString& aString) { - static char kBinarySet[]="01"; - - if (aValue<0) - aValue=65536-abs(aValue); - SeriesString(aValue,aString,kBinarySet,0,2); - } - - /** - * Compute a counter string in hex format for the given value. - * - * @update rickg 6June2000 - * - * @param aValue -- cardinal value of string - * @param aString -- will hold result - */ - static void HexString(PRInt32 aValue,nsString& aString) { - static char kHexSet[]="0123456789ABCDEF"; - - if (aValue<0) - aValue=65536-abs(aValue); - SeriesString(aValue,aString,kHexSet,0,16); - } - - /** - * Compute a counter string in the roman style for the given value. - * - * @update rickg 6June2000 - * - * @param aValue -- cardinal value of string - * @param aString -- will hold result - */ - static void RomanString(PRInt32 aValue,nsString& aString) { - static PRUnichar digitsA[] = { PRUnichar('i'), - PRUnichar('x'), - PRUnichar('c'), - PRUnichar('m') }; - static PRUnichar digitsB[] = { PRUnichar('v'), - PRUnichar('l'), - PRUnichar('d'), - PRUnichar('?') }; - - aString.Truncate(); - if(aValue<0) - aString.Append(PRUnichar('-')); - - aValue=abs(aValue); - char decStr[20]; - sprintf(decStr,"%d", aValue); - - int len=strlen(decStr); - int romanPos=len; - int digitPos=0; - int n=0; - - for(digitPos=0;digitPos<len;++digitPos) { - romanPos--; - switch(decStr[digitPos]) { - case '0': break; - case '3': aString.Append(digitsA[romanPos]); - case '2': aString.Append(digitsA[romanPos]); - case '1': aString.Append(digitsA[romanPos]); - break; - case '4': aString.Append(digitsA[romanPos]); - case '5': case '6': - case '7': case '8': - aString.Append(digitsB[romanPos]); - for(n=0;n<(decStr[digitPos]-'5');++n) - aString.Append(digitsA[romanPos]); - break; - case '9': - aString.Append(digitsA[romanPos]); - aString.Append(digitsA[romanPos]); - break; - } - } - } - - /** - * Compute a counter string in the alpha style for the given value. - * - * @update rickg 6June2000 - * - * @param aValue -- cardinal value of string - * @param aString -- will hold result - */ - static void AlphaString(PRInt32 aValue,nsString& aString) { - static const char kAlphaSet[]="abcdefghijklmnopqrstuvwxyz"; - - if (0<aValue) - SeriesString(aValue-1,aString,kAlphaSet,-1,26); - } - - /** - * Compute a counter string in the footnote style for the given value. - * - * @update rickg 6June2000 - * - * @param aValue -- cardinal value of string - * @param aString -- will hold result - */ - static void FootnoteString(PRInt32 aValue,nsString& aString) { - static char kFootnoteSet[]="abcdefg"; - - int seriesLen = strlen (kFootnoteSet) - 1; - int count=0; - int repCount=0; - int modChar=0; - - aString.Truncate(); - - aValue=abs(aValue); - repCount=((aValue-1)/seriesLen); - modChar=aValue-(repCount*seriesLen); - - for(count=0;count<=repCount;++count) { - aString.Append(PRUnichar(kFootnoteSet[modChar])); - } - } - -protected: - - const char* mUserSeries; - eNumFormat mFormat; - PRBool mCase; - PRInt32 mValue; - int mUserBase; -}; - - -/** - * - * @update gess 11May2000 - */ -void nsDTDContext::AllocateCounters(void) { - if(!mCounters) { - mCounters = new PRInt32 [NS_HTML_TAG_MAX]; //in addition to reseting, you may need to allocate. - ResetCounters(); - } -} - -/** - * - * @update gess 11May2000 - */ -void nsDTDContext::ResetCounters(void) { - if(mCounters) { - memset(mCounters,0,NS_HTML_TAG_MAX*sizeof(PRInt32)); - } -} - -/********************************************************** - @update: rickg 17May2000 - - Call this to handle counter attributes: - name="group" - value="nnn" - noincr="?" - format="alpha|dec|footnote|hex|roman|spelled|talk" - - returns the newly incremented value for the (determined) group. - **********************************************************/ -PRInt32 nsDTDContext::IncrementCounter(eHTMLTags aTag,nsIParserNode& aNode,nsString& aResult) { - - PRInt32 result=0; - - PRInt32 theIndex=0; - PRInt32 theNewValue=-1; //-1 is interpreted to mean "don't reset the counter sequence. - PRInt32 theIncrValue=1; //this may get set to 0 if we see a "noincr" key. - PRInt32 theCount=aNode.GetAttributeCount(); - CNamedEntity *theEntity=0; - - CAbacus::eNumFormat theNumFormat=CAbacus::eDecimal; - - for(theIndex=0;theIndex<theCount;++theIndex){ - const nsAString& theKey=aNode.GetKeyAt(theIndex); - const nsAString& theValue=aNode.GetValueAt(theIndex); - - if(theKey.Equals(NS_LITERAL_STRING("name"), nsCaseInsensitiveStringComparator())){ - theEntity=GetEntity(theValue); - if(!theEntity) { - theEntity=RegisterEntity(theValue,theValue); - theEntity->mOrdinal=0; - } - aTag=eHTMLTag_userdefined; - } - else if(theKey.Equals(NS_LITERAL_STRING("noincr"), nsCaseInsensitiveStringComparator())){ - theIncrValue=0; - } - else if(theKey.Equals(NS_LITERAL_STRING("format"), nsCaseInsensitiveStringComparator())){ - nsAString::const_iterator start; - - PRUnichar theChar=*theValue.BeginReading(start); - if('"'==theChar) - theChar=*(++start); - switch(theChar){ - case 'A': case 'a': theNumFormat=CAbacus::eAlpha; break; - case 'B': case 'b': theNumFormat=CAbacus::eBinary; break; - case 'D': case 'd': theNumFormat=CAbacus::eDecimal; break; - case 'H': case 'h': theNumFormat=CAbacus::eHex; break; - case 'R': case 'r': theNumFormat=CAbacus::eRoman; break; - case 'S': case 's': theNumFormat=CAbacus::eSpelled; break; - default: - theNumFormat=CAbacus::eDecimal; - break; - } - //determine numbering style - } - else if(theKey.Equals(NS_LITERAL_STRING("value"), nsCaseInsensitiveStringComparator())){ - PRInt32 err=0; - theNewValue=atoi(NS_LossyConvertUCS2toASCII(theValue).get()); - if(!err) { - - theIncrValue=0; - - AllocateCounters(); - if(mCounters) { - mCounters[aTag]=theNewValue; - } - } - else theNewValue=-1; - } - } - - if(theEntity && (eHTMLTag_userdefined==aTag)) { - result=theEntity->mOrdinal+=theIncrValue; - } - else { - AllocateCounters(); - if(mCounters) { - result=mCounters[aTag]+=theIncrValue; - } - else result=0; - } - CAbacus::GetFormattedString(theNumFormat,result,aResult,0,0,0); - - return result; -} - -#endif - -/** - * - * @update gess7/9/98 - */ -PRBool nsDTDContext::HasOpenContainer(eHTMLTags aTag) const { - PRInt32 theIndex=mStack.LastOf(aTag); - return PRBool(-1<theIndex); -} - -/** - * - * @update gess7/9/98 - */ -void nsDTDContext::Push(nsCParserNode* aNode, - nsEntryStack* aStyleStack, - PRBool aRefCntNode) { - if(aNode) { -#ifdef NS_DEBUG - eHTMLTags theTag = (eHTMLTags)aNode->GetNodeType(); - int size = mStack.mCount; - if (size < eMaxTags) - mXTags[size] = theTag; -#endif - mStack.Push(aNode, aStyleStack, aRefCntNode); - } -} - -nsTagEntry* -nsDTDContext::PopEntry() -{ - PRInt32 theSize = mStack.mCount; - if(0<theSize) { -#ifdef NS_DEBUG - if (theSize <= eMaxTags) - mXTags[theSize-1]=eHTMLTag_unknown; -#endif - return mStack.PopEntry(); - } - return 0; -} - -void nsDTDContext::PushEntry(nsTagEntry* aEntry, - PRBool aRefCntNode) -{ -#ifdef NS_DEBUG - int size=mStack.mCount; - if(size< eMaxTags && aEntry) - mXTags[size]=aEntry->mTag; -#endif - mStack.PushEntry(aEntry, aRefCntNode); -} - -/* This method will move the top entires, in the entry-stack, into dest context. - * @param aDest - Destination context for the entries. - * @param aCount - Number of entries, on top of the entry-stack, to be moved. - */ -void -nsDTDContext::MoveEntries(nsDTDContext& aDest, - PRInt32 aCount) -{ - NS_ASSERTION(aCount > 0 && mStack.mCount >= aCount, "cannot move entries"); - if (aCount > 0 && mStack.mCount >= aCount) { - while (aCount) { - aDest.PushEntry(&mStack.mEntries[--mStack.mCount], PR_FALSE); -#ifdef NS_DEBUG - mXTags[mStack.mCount] = eHTMLTag_unknown; -#endif - --aCount; - } - } -} - -/** - * @update gess 11/11/99, - * harishd 04/04/99 - */ -nsCParserNode* nsDTDContext::Pop(nsEntryStack *&aChildStyleStack) { - - PRInt32 theSize=mStack.mCount; - nsCParserNode* result=0; - - if(0<theSize) { - -#ifdef NS_DEBUG - if ((theSize>0) && (theSize <= eMaxTags)) - mXTags[theSize-1]=eHTMLTag_unknown; -#endif - - - nsTagEntry* theEntry=mStack.EntryAt(mStack.mCount-1); - if(theEntry) { - aChildStyleStack=theEntry->mStyles; - } - - result=mStack.Pop(); - theEntry->mParent=0; - } - - return result; -} - -/** - * - * @update harishd 04/07/00 - */ - -nsCParserNode* nsDTDContext::Pop() { - nsEntryStack *theTempStyleStack=0; // This has no use here... - return Pop(theTempStyleStack); -} - -/** - * - * @update gess7/9/98 - */ -eHTMLTags nsDTDContext::First(void) const { - return mStack.First(); -} - -/** - * - * @update gess7/9/98 - */ -eHTMLTags nsDTDContext::TagAt(PRInt32 anIndex) const { - return mStack.TagAt(anIndex); -} - -/** - * - * @update gess7/9/98 - */ -nsTagEntry* nsDTDContext::LastEntry(void) const { - return mStack.EntryAt(mStack.mCount-1); -} - -/** - * - * @update gess7/9/98 - */ -eHTMLTags nsDTDContext::Last() const { - return mStack.Last(); -} - - -/** - * - * @update gess7/9/98 - */ -nsEntryStack* nsDTDContext::GetStylesAt(PRInt32 anIndex) const { - nsEntryStack* result=0; - - if(anIndex<mStack.mCount){ - nsTagEntry* theEntry=mStack.EntryAt(anIndex); - if(theEntry) { - result=theEntry->mStyles; - } - } - return result; -} - - -/** - * - * @update gess 04/28/99 - */ -void nsDTDContext::PushStyle(nsCParserNode* aNode){ - - nsTagEntry* theEntry=mStack.EntryAt(mStack.mCount-1); - if(theEntry ) { - nsEntryStack* theStack=theEntry->mStyles; - if(!theStack) { - theStack=theEntry->mStyles=new nsEntryStack(); - } - if(theStack) { - theStack->Push(aNode); - ++mResidualStyleCount; - } - } //if -} - - -/** - * Call this when you have an EntryStack full of styles - * that you want to push at this level. - * - * @update gess 04/28/99 - */ -void nsDTDContext::PushStyles(nsEntryStack *aStyles){ - - if(aStyles) { - nsTagEntry* theEntry=mStack.EntryAt(mStack.mCount-1); - if(theEntry ) { - nsEntryStack* theStyles=theEntry->mStyles; - if(!theStyles) { - theEntry->mStyles=aStyles; - - PRUint32 scount=aStyles->mCount; - PRUint32 sindex=0; - - theEntry=aStyles->mEntries; - for(sindex=0;sindex<scount;++sindex){ - theEntry->mParent=0; //this tells us that the style is not open at any level - ++theEntry; - ++mResidualStyleCount; - } //for - - } - else { - theStyles->Append(aStyles); - // Delete aStyles since it has been copied to theStyles... - delete aStyles; - aStyles=0; - } - } //if(theEntry ) - else if(mStack.mCount==0) { - // If you're here it means that we have hit the rock bottom - // ,of the stack, and there's no need to handle anymore styles. - // Fix for bug 29048 - IF_DELETE(aStyles,mNodeAllocator); - } - }//if(aStyles) -} - - -/** - * - * @update gess 04/28/99 - */ -nsCParserNode* nsDTDContext::PopStyle(void){ - nsCParserNode *result=0; - - nsTagEntry *theEntry=mStack.EntryAt(mStack.mCount-1); - if(theEntry && (theEntry->mNode)) { - nsEntryStack* theStyleStack=theEntry->mParent; - if(theStyleStack){ - result=theStyleStack->Pop(); - mResidualStyleCount--; - } - } //if - return result; -} - -/** - * - * @update gess 04/28/99 - */ -nsCParserNode* nsDTDContext::PopStyle(eHTMLTags aTag){ - - PRInt32 theLevel=0; - nsCParserNode* result=0; - - for(theLevel=mStack.mCount-1;theLevel>0;theLevel--) { - nsEntryStack *theStack=mStack.mEntries[theLevel].mStyles; - if(theStack) { - if(aTag==theStack->Last()) { - result=theStack->Pop(); - mResidualStyleCount--; - break; // Fix bug 50710 - Stop after finding a style. - } else { - // NS_ERROR("bad residual style entry"); - } - } - } - - return result; -} - -/** - * - * This is similar to popstyle, except that it removes the - * style tag given from anywhere in the style stack, and - * not just at the top. - * - * @update gess 01/26/00 - */ -void nsDTDContext::RemoveStyle(eHTMLTags aTag){ - - PRInt32 theLevel=mStack.mCount; - - while (theLevel) { - nsEntryStack *theStack=GetStylesAt(--theLevel); - if (theStack) { - PRInt32 index=theStack->mCount; - while (index){ - nsTagEntry *theEntry=theStack->EntryAt(--index); - if (aTag==(eHTMLTags)theEntry->mNode->GetNodeType()) { - mResidualStyleCount--; - nsCParserNode* result=theStack->Remove(index,aTag); - IF_FREE(result, mNodeAllocator); - return; - } - } - } - } -} - -/** - * This gets called when the parser module is getting unloaded - * - * @return nada - */ -void nsDTDContext::ReleaseGlobalObjects(void){ -} - - -/************************************************************** - Now define the nsTokenAllocator class... - **************************************************************/ - -static const size_t kTokenBuckets[] ={sizeof(CStartToken),sizeof(CAttributeToken),sizeof(CCommentToken),sizeof(CEndToken)}; -static const PRInt32 kNumTokenBuckets = sizeof(kTokenBuckets) / sizeof(size_t); -static const PRInt32 kInitialTokenPoolSize = NS_SIZE_IN_HEAP(sizeof(CToken)) * 200; - -/** - * - * @update gess7/25/98 - * @param - */ -nsTokenAllocator::nsTokenAllocator() { - - MOZ_COUNT_CTOR(nsTokenAllocator); - - mArenaPool.Init("TokenPool", kTokenBuckets, kNumTokenBuckets, kInitialTokenPoolSize); - -#ifdef NS_DEBUG - int i=0; - for(i=0;i<eToken_last-1;++i) { - mTotals[i]=0; - } -#endif - -} - -/** - * Destructor for the token factory - * @update gess7/25/98 - */ -nsTokenAllocator::~nsTokenAllocator() { - - MOZ_COUNT_DTOR(nsTokenAllocator); - -} - -class CTokenFinder: public nsDequeFunctor{ -public: - CTokenFinder(CToken* aToken) {mToken=aToken;} - virtual void* operator()(void* anObject) { - if(anObject==mToken) { - return anObject; - } - return 0; - } - CToken* mToken; -}; - -/** - * Let's get this code ready to be reused by all the contexts. - * - * @update rickg 12June2000 - * @param aType -- tells you the type of token to create - * @param aTag -- tells you the type of tag to init with this token - * @param aString -- gives a default string value for the token - * - * @return ptr to new token (or 0). - */ -CToken* nsTokenAllocator::CreateTokenOfType(eHTMLTokenTypes aType,eHTMLTags aTag, const nsAString& aString) { - - CToken* result=0; - -#ifdef NS_DEBUG - mTotals[aType-1]++; -#endif - switch(aType){ - case eToken_start: result=new(mArenaPool) CStartToken(aString, aTag); break; - case eToken_end: result=new(mArenaPool) CEndToken(aString, aTag); break; - case eToken_comment: result=new(mArenaPool) CCommentToken(aString); break; - case eToken_entity: result=new(mArenaPool) CEntityToken(aString); break; - case eToken_whitespace: result=new(mArenaPool) CWhitespaceToken(aString); break; - case eToken_newline: result=new(mArenaPool) CNewlineToken(); break; - case eToken_text: result=new(mArenaPool) CTextToken(aString); break; - case eToken_attribute: result=new(mArenaPool) CAttributeToken(aString); break; - case eToken_script: result=new(mArenaPool) CScriptToken(aString); break; - case eToken_style: result=new(mArenaPool) CStyleToken(aString); break; - case eToken_instruction: result=new(mArenaPool) CInstructionToken(aString); break; - case eToken_cdatasection: result=new(mArenaPool) CCDATASectionToken(aString); break; - case eToken_error: result=new(mArenaPool) CErrorToken(); break; - case eToken_doctypeDecl: result=new(mArenaPool) CDoctypeDeclToken(aString); break; - case eToken_markupDecl: result=new(mArenaPool) CMarkupDeclToken(aString); break; - default: - NS_ASSERTION(PR_FALSE, "nsDTDUtils::CreateTokenOfType: illegal token type"); - break; - } - - return result; -} - -/** - * Let's get this code ready to be reused by all the contexts. - * - * @update rickg 12June2000 - * @param aType -- tells you the type of token to create - * @param aTag -- tells you the type of tag to init with this token - * - * @return ptr to new token (or 0). - */ -CToken* nsTokenAllocator::CreateTokenOfType(eHTMLTokenTypes aType,eHTMLTags aTag) { - - CToken* result=0; - -#ifdef NS_DEBUG - mTotals[aType-1]++; -#endif - switch(aType){ - case eToken_start: result=new(mArenaPool) CStartToken(aTag); break; - case eToken_end: result=new(mArenaPool) CEndToken(aTag); break; - case eToken_comment: result=new(mArenaPool) CCommentToken(); break; - case eToken_attribute: result=new(mArenaPool) CAttributeToken(); break; - case eToken_entity: result=new(mArenaPool) CEntityToken(); break; - case eToken_whitespace: result=new(mArenaPool) CWhitespaceToken(); break; - case eToken_newline: result=new(mArenaPool) CNewlineToken(); break; - case eToken_text: result=new(mArenaPool) CTextToken(); break; - case eToken_script: result=new(mArenaPool) CScriptToken(); break; - case eToken_style: result=new(mArenaPool) CStyleToken(); break; - case eToken_instruction: result=new(mArenaPool) CInstructionToken(); break; - case eToken_cdatasection: result=new(mArenaPool) CCDATASectionToken(aTag); break; - case eToken_error: result=new(mArenaPool) CErrorToken(); break; - case eToken_doctypeDecl: result=new(mArenaPool) CDoctypeDeclToken(aTag); break; - case eToken_markupDecl: result=new(mArenaPool) CMarkupDeclToken(); break; - default: - NS_ASSERTION(PR_FALSE, "nsDTDUtils::CreateTokenOfType: illegal token type"); - break; - } - - return result; -} - -#ifdef DEBUG_TRACK_NODES - -static nsCParserNode* gAllNodes[100]; -static int gAllNodeCount=0; - -int FindNode(nsCParserNode *aNode) { - int theIndex=0; - for(theIndex=0;theIndex<gAllNodeCount;++theIndex) { - if(gAllNodes[theIndex]==aNode) { - return theIndex; - } - } - return -1; -} - -void AddNode(nsCParserNode *aNode) { - if(-1==FindNode(aNode)) { - gAllNodes[gAllNodeCount++]=aNode; - } - else { - //you tried to recycle a node twice! - } -} - -void RemoveNode(nsCParserNode *aNode) { - int theIndex=FindNode(aNode); - if(-1<theIndex) { - gAllNodes[theIndex]=gAllNodes[--gAllNodeCount]; - } -} - -#endif - - -#ifdef HEAP_ALLOCATED_NODES -nsNodeAllocator::nsNodeAllocator():mSharedNodes(0){ -#ifdef DEBUG_TRACK_NODES - mCount=0; -#endif -#else - static const size_t kNodeBuckets[] = { sizeof(nsCParserNode), sizeof(nsCParserStartNode) }; - static const PRInt32 kNumNodeBuckets = sizeof(kNodeBuckets) / sizeof(size_t); - static const PRInt32 kInitialNodePoolSize = NS_SIZE_IN_HEAP(sizeof(nsCParserNode)) * 35; // optimal size based on space-trace data -nsNodeAllocator::nsNodeAllocator() { - mNodePool.Init("NodePool", kNodeBuckets, kNumNodeBuckets, kInitialNodePoolSize); -#endif - MOZ_COUNT_CTOR(nsNodeAllocator); -} - -nsNodeAllocator::~nsNodeAllocator() { - MOZ_COUNT_DTOR(nsNodeAllocator); - -#ifdef HEAP_ALLOCATED_NODES - nsCParserNode* theNode = 0; - - while((theNode=(nsCParserNode*)mSharedNodes.Pop())){ -#ifdef DEBUG_TRACK_NODES - RemoveNode(theNode); -#endif - ::operator delete(theNode); - theNode=nsnull; - } -#ifdef DEBUG_TRACK_NODES - if(mCount) { - printf("**************************\n"); - printf("%i out of %i nodes leaked!\n",gAllNodeCount,mCount); - printf("**************************\n"); - } -#endif -#endif -} - -nsCParserNode* nsNodeAllocator::CreateNode(CToken* aToken, - nsTokenAllocator* aTokenAllocator) -{ - nsCParserNode* result = 0; -#ifdef HEAP_ALLOCATED_NODES -#if 0 - if(gAllNodeCount!=mSharedNodes.GetSize()) { - int x=10; //this is very BAD! - } -#endif - result = NS_STATIC_CAST(nsCParserNode*,mSharedNodes.Pop()); - if (result) { - result->Init(aToken, aTokenAllocator,this); - } - else{ - result = nsCParserNode::Create(aToken, aTokenAllocator,this); -#ifdef DEBUG_TRACK_NODES - ++mCount; - AddNode(NS_STATIC_CAST(nsCParserNode*,result)); -#endif - IF_HOLD(result); - } -#else - eHTMLTokenTypes type = aToken ? eHTMLTokenTypes(aToken->GetTokenType()) : eToken_unknown; - switch (type) { - case eToken_start: - result = nsCParserStartNode::Create(aToken, aTokenAllocator,this); - break; - default : - result = nsCParserNode::Create(aToken, aTokenAllocator,this); - break; - } - IF_HOLD(result); -#endif - return result; -} - -#ifdef DEBUG -void DebugDumpContainmentRules(nsIDTD& theDTD,const char* aFilename,const char* aTitle) { -} -#endif - - -/************************************************************************* - * The table lookup technique was adapted from the algorithm described * - * by Avram Perez, Byte-wise CRC Calculations, IEEE Micro 3, 40 (1983). * - *************************************************************************/ - -#define POLYNOMIAL 0x04c11db7L - -static PRBool crc_table_initialized; -static PRUint32 crc_table[256]; - -static void gen_crc_table() { - /* generate the table of CRC remainders for all possible bytes */ - int i, j; - PRUint32 crc_accum; - for ( i = 0; i < 256; i++ ) { - crc_accum = ( (unsigned long) i << 24 ); - for ( j = 0; j < 8; j++ ) { - if ( crc_accum & 0x80000000L ) - crc_accum = ( crc_accum << 1 ) ^ POLYNOMIAL; - else crc_accum = ( crc_accum << 1 ); - } - crc_table[i] = crc_accum; - } - return; -} - -PRUint32 AccumulateCRC(PRUint32 crc_accum, char *data_blk_ptr, int data_blk_size) { - if (!crc_table_initialized) { - gen_crc_table(); - crc_table_initialized = PR_TRUE; - } - - /* update the CRC on the data block one byte at a time */ - int i, j; - for ( j = 0; j < data_blk_size; j++ ) { - i = ( (int) ( crc_accum >> 24) ^ *data_blk_ptr++ ) & 0xff; - crc_accum = ( crc_accum << 8 ) ^ crc_table[i]; - } - return crc_accum; -} - -/************************************************************** - This defines the topic object used by the observer service. - The observerService uses a list of these, 1 per topic when - registering tags. - **************************************************************/ -NS_IMPL_ISUPPORTS1(nsObserverEntry, nsIObserverEntry) - -nsObserverEntry::nsObserverEntry(const nsAString& aTopic) : mTopic(aTopic) -{ - memset(mObservers, 0, sizeof(mObservers)); -} - -nsObserverEntry::~nsObserverEntry() { - for (PRInt32 i = 0; i <= NS_HTML_TAG_MAX; ++i){ - if (mObservers[i]) { - PRInt32 count = mObservers[i]->Count(); - for (PRInt32 j = 0; j < count; ++j) { - nsISupports* obs = (nsISupports*)mObservers[i]->ElementAt(j); - NS_IF_RELEASE(obs); - } - delete mObservers[i]; - } - } -} - -NS_IMETHODIMP -nsObserverEntry::Notify(nsIParserNode* aNode, - nsIParser* aParser, - nsISupports* aWebShell, - const PRUint32 aFlags) -{ - NS_ENSURE_ARG_POINTER(aNode); - NS_ENSURE_ARG_POINTER(aParser); - - nsresult result = NS_OK; - eHTMLTags theTag = (eHTMLTags)aNode->GetNodeType(); - - if (theTag <= NS_HTML_TAG_MAX) { - nsVoidArray* theObservers = mObservers[theTag]; - if (theObservers) { - PRInt32 theCharsetSource; - nsCAutoString charset; - aParser->GetDocumentCharset(charset,theCharsetSource); - NS_ConvertASCIItoUCS2 theCharsetValue(charset); - - PRInt32 theAttrCount = aNode->GetAttributeCount(); - PRInt32 theObserversCount = theObservers->Count(); - if (0 < theObserversCount){ - nsStringArray keys(theAttrCount+4), values(theAttrCount+4); - - // XXX this and the following code may be a performance issue. - // Every key and value is copied and added to an voidarray (causing at - // least 2 allocations for mImpl, usually more, plus at least 1 per - // string (total = 2*(keys+3) + 2(or more) array allocations )). - PRInt32 index; - for (index = 0; index < theAttrCount; ++index) { - keys.AppendString(aNode->GetKeyAt(index)); - values.AppendString(aNode->GetValueAt(index)); - } - - nsAutoString intValue; - - keys.AppendString(NS_LITERAL_STRING("charset")); - values.AppendString(theCharsetValue); - - keys.AppendString(NS_LITERAL_STRING("charsetSource")); - intValue.AppendInt(PRInt32(theCharsetSource),10); - values.AppendString(intValue); - - keys.AppendString(NS_LITERAL_STRING("X_COMMAND")); - values.AppendString(NS_LITERAL_STRING("text/html")); - - nsCOMPtr<nsIChannel> channel; - aParser->GetChannel(getter_AddRefs(channel)); - - for (index=0;index<theObserversCount;++index) { - nsIElementObserver* observer = NS_STATIC_CAST(nsIElementObserver*,theObservers->ElementAt(index)); - if (observer) { - result = observer->Notify(aWebShell, channel, - nsHTMLTags::GetStringValue(theTag), - &keys, &values, aFlags); - if (NS_FAILED(result)) { - break; - } - } - } - } - } - } - return result; -} - -PRBool -nsObserverEntry::Matches(const nsAString& aString) { - PRBool result = aString.Equals(mTopic); - return result; -} - -nsresult -nsObserverEntry::AddObserver(nsIElementObserver *aObserver, - eHTMLTags aTag) -{ - if (aObserver) { - if (!mObservers[aTag]) { - mObservers[aTag] = new nsAutoVoidArray(); - if (!mObservers[aTag]) { - return NS_ERROR_OUT_OF_MEMORY; - } - } - NS_ADDREF(aObserver); - mObservers[aTag]->AppendElement(aObserver); - } - return NS_OK; -} - -void -nsObserverEntry::RemoveObserver(nsIElementObserver *aObserver) -{ - for (PRInt32 i=0; i <= NS_HTML_TAG_MAX; ++i){ - if (mObservers[i]) { - nsISupports* obs = aObserver; - PRBool removed = mObservers[i]->RemoveElement(obs); - if (removed) { - NS_RELEASE(obs); - } - } - } -} diff --git a/htmlparser/src/nsDTDUtils.h b/htmlparser/src/nsDTDUtils.h deleted file mode 100644 index 432cc358edfd..000000000000 --- a/htmlparser/src/nsDTDUtils.h +++ /dev/null @@ -1,656 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - - -/** - * MODULE NOTES: - * @update gess 4/1/98 - * - */ - - - -#ifndef DTDUTILS_ -#define DTDUTILS_ - -#include "nsHTMLTags.h" -#include "nsHTMLTokens.h" -#include "nsIParser.h" -#include "nsCRT.h" -#include "nsDeque.h" -#include "nsIDTD.h" -#include "nsITokenizer.h" -#include "nsString.h" -#include "nsIParserNode.h" -#include "nsFixedSizeAllocator.h" -#include "nsVoidArray.h" -#include "nsIParserService.h" -#include "nsReadableUtils.h" - -#define IF_HOLD(_ptr) \ - PR_BEGIN_MACRO \ - if(_ptr) { \ - _ptr->AddRef(); \ - } \ - PR_END_MACRO - -// recycles _ptr -#define IF_FREE(_ptr, _allocator) \ - PR_BEGIN_MACRO \ - if(_ptr && _allocator) { \ - _ptr->Release((_allocator)->GetArenaPool()); \ - _ptr=0; \ - } \ - PR_END_MACRO - -// release objects and destroy _ptr -#define IF_DELETE(_ptr, _allocator) \ - PR_BEGIN_MACRO \ - if(_ptr) { \ - _ptr->ReleaseAll(_allocator); \ - delete(_ptr); \ - _ptr=0; \ - } \ - PR_END_MACRO - -class nsIParserNode; -class nsCParserNode; -class nsNodeAllocator; - - -#ifdef DEBUG -void DebugDumpContainmentRules(nsIDTD& theDTD,const char* aFilename,const char* aTitle); -void DebugDumpContainmentRules2(nsIDTD& theDTD,const char* aFilename,const char* aTitle); -#endif -PRUint32 AccumulateCRC(PRUint32 crc_accum, char *data_blk_ptr, int data_blk_size); - - - -/*************************************************************** - First, define the tagstack class - ***************************************************************/ - -class nsEntryStack; //forware declare to make compilers happy. - -struct nsTagEntry { - nsTagEntry () - : mTag(eHTMLTag_unknown), mNode(0), mParent(0), mStyles(0){} - eHTMLTags mTag; //for speedier access to tag id - nsCParserNode* mNode; - nsEntryStack* mParent; - nsEntryStack* mStyles; -}; - -class nsEntryStack { - -public: - nsEntryStack(); - ~nsEntryStack(); - - nsTagEntry* PopEntry(); - void PushEntry(nsTagEntry* aEntry, PRBool aRefCntNode = PR_TRUE); - void EnsureCapacityFor(PRInt32 aNewMax, PRInt32 aShiftOffset=0); - void Push(nsCParserNode* aNode,nsEntryStack* aStyleStack=0, PRBool aRefCntNode = PR_TRUE); - void PushFront(nsCParserNode* aNode,nsEntryStack* aStyleStack=0, PRBool aRefCntNode = PR_TRUE); - void Append(nsEntryStack *aStack); - nsCParserNode* Pop(void); - nsCParserNode* Remove(PRInt32 anIndex,eHTMLTags aTag); - nsCParserNode* NodeAt(PRInt32 anIndex) const; - eHTMLTags First() const; - eHTMLTags TagAt(PRInt32 anIndex) const; - nsTagEntry* EntryAt(PRInt32 anIndex) const; - eHTMLTags operator[](PRInt32 anIndex) const; - eHTMLTags Last() const; - void Empty(void); - - /* - * Release all objects in the entry stack - */ - void ReleaseAll(nsNodeAllocator* aNodeAllocator); - - /** - * Find the first instance of given tag on the stack. - * @update gess 12/14/99 - * @param aTag - * @return index of tag, or kNotFound if not found - */ - inline PRInt32 FirstOf(eHTMLTags aTag) const { - PRInt32 index=-1; - - if(0<mCount) { - while(++index<mCount) { - if(aTag==mEntries[index].mTag) { - return index; - } - } //while - } - return kNotFound; - } - - - /** - * Find the last instance of given tag on the stack. - * @update gess 12/14/99 - * @param aTag - * @return index of tag, or kNotFound if not found - */ - inline PRInt32 LastOf(eHTMLTags aTag) const { - PRInt32 index=mCount; - while(--index>=0) { - if(aTag==mEntries[index].mTag) { - return index; - } - } - return kNotFound; - } - - nsTagEntry* mEntries; - PRInt32 mCount; - PRInt32 mCapacity; -}; - - -/********************************************************** - The table state class is used to store info about each - table that is opened on the stack. As tables open and - close on the context, we update these objects to track - what has/hasn't been seen on a per table basis. - **********************************************************/ -class CTableState { -public: - CTableState(CTableState *aPreviousState=0) { - mHasCaption=PR_FALSE; - mHasCols=PR_FALSE; - mHasTHead=PR_FALSE; - mHasTFoot=PR_FALSE; - mHasTBody=PR_FALSE; - mPrevious=aPreviousState; - } - - PRBool CanOpenCaption() { - PRBool result=!(mHasCaption || mHasCols || mHasTHead || mHasTFoot || mHasTBody); - return result; - } - - PRBool CanOpenCols() { - PRBool result=!(mHasCols || mHasTHead || mHasTFoot || mHasTBody); - return result; - } - - PRBool CanOpenTBody() { - PRBool result=!(mHasTBody); - return result; - } - - PRBool CanOpenTHead() { - PRBool result=!(mHasTHead || mHasTFoot || mHasTBody); - return result; - } - - PRBool CanOpenTFoot() { - PRBool result=!(mHasTFoot || mHasTBody); - return result; - } - - PRPackedBool mHasCaption; - PRPackedBool mHasCols; - PRPackedBool mHasTHead; - PRPackedBool mHasTFoot; - PRPackedBool mHasTBody; - CTableState *mPrevious; -}; - -#ifdef DEBUG -//used for named entities and counters (XXX debug only) -class CNamedEntity { -public: - CNamedEntity(const nsAString& aName,const nsAString& aValue) : mName(), mValue() { - PRUnichar theFirst=aName.First(); - PRUnichar theLast=aName.Last(); - PRInt32 theLen=aName.Length(); - if((2<theLen) && (theFirst==theLast) && (kQuote==theFirst)) { - mName = Substring(aName, 1, theLen - 2); - } - else mName=aName; - - theFirst=aValue.First(); - theLast=aValue.Last(); - theLen=aValue.Length(); - if((2<theLen) && (theFirst==theLast) && (kQuote==theFirst)) { - mValue = Substring(aValue, 1, theLen - 2); - } - else mValue=aValue; - - } - - nsString mName; - nsString mValue; - PRInt32 mOrdinal; -}; -#endif -/************************************************************************ - nsTokenAllocator class implementation. - This class is used to recycle tokens. - By using this simple class, we cut WAY down on the number of tokens - that get created during the run of the system. - - Note: The allocator is created per document. It's been shared - ( but not ref. counted ) by objects, tokenizer,dtd,and dtd context, - that cease to exist when the document is destroyed. - ************************************************************************/ -class nsTokenAllocator { -public: - - nsTokenAllocator(); - virtual ~nsTokenAllocator(); - virtual CToken* CreateTokenOfType(eHTMLTokenTypes aType,eHTMLTags aTag, const nsAString& aString); - virtual CToken* CreateTokenOfType(eHTMLTokenTypes aType,eHTMLTags aTag); - - nsFixedSizeAllocator& GetArenaPool() { return mArenaPool; } - -protected: - nsFixedSizeAllocator mArenaPool; - - -#ifdef NS_DEBUG - int mTotals[eToken_last-1]; -#endif -}; - -/************************************************************************ - CNodeRecycler class implementation. - This class is used to recycle nodes. - By using this simple class, we cut down on the number of nodes - that get created during the run of the system. - ************************************************************************/ - -#ifndef HEAP_ALLOCATED_NODES -class nsCParserNode; -#endif - -class nsNodeAllocator { -public: - - nsNodeAllocator(); - virtual ~nsNodeAllocator(); - virtual nsCParserNode* CreateNode(CToken* aToken=nsnull, nsTokenAllocator* aTokenAllocator=0); - - nsFixedSizeAllocator& GetArenaPool() { return mNodePool; } - -#ifdef HEAP_ALLOCATED_NODES - void Recycle(nsCParserNode* aNode) { mSharedNodes.Push(NS_STATIC_CAST(void*,aNode)); } -protected: - nsDeque mSharedNodes; -#ifdef DEBUG_TRACK_NODES - PRInt32 mCount; -#endif -#endif - -protected: - nsFixedSizeAllocator mNodePool; -}; - -/************************************************************************ - The dtdcontext class defines an ordered list of tags (a context). - ************************************************************************/ - -class nsDTDContext { -public: - nsDTDContext(); - ~nsDTDContext(); - - nsTagEntry* PopEntry(); - void PushEntry(nsTagEntry* aEntry, PRBool aRefCntNode = PR_TRUE); - void MoveEntries(nsDTDContext& aDest, PRInt32 aCount); - void Push(nsCParserNode* aNode,nsEntryStack* aStyleStack=0, PRBool aRefCntNode = PR_TRUE); - nsCParserNode* Pop(nsEntryStack*& aChildStack); - nsCParserNode* Pop(); - nsCParserNode* PeekNode() { return mStack.NodeAt(mStack.mCount-1); } - eHTMLTags First(void) const; - eHTMLTags Last(void) const; - nsTagEntry* LastEntry(void) const; - eHTMLTags TagAt(PRInt32 anIndex) const; - eHTMLTags operator[](PRInt32 anIndex) const {return TagAt(anIndex);} - PRBool HasOpenContainer(eHTMLTags aTag) const; - PRInt32 FirstOf(eHTMLTags aTag) const {return mStack.FirstOf(aTag);} - PRInt32 LastOf(eHTMLTags aTag) const {return mStack.LastOf(aTag);} - - void Empty(void); - PRInt32 GetCount(void) {return mStack.mCount;} - PRInt32 GetResidualStyleCount(void) {return mResidualStyleCount;} - nsEntryStack* GetStylesAt(PRInt32 anIndex) const; - void PushStyle(nsCParserNode* aNode); - void PushStyles(nsEntryStack *aStyles); - nsCParserNode* PopStyle(void); - nsCParserNode* PopStyle(eHTMLTags aTag); - void RemoveStyle(eHTMLTags aTag); - - static void ReleaseGlobalObjects(void); - - void SetTokenAllocator(nsTokenAllocator* aTokenAllocator) { mTokenAllocator=aTokenAllocator; } - void SetNodeAllocator(nsNodeAllocator* aNodeAllocator) { mNodeAllocator=aNodeAllocator; } - - nsEntryStack mStack; //this will hold a list of tagentries... - PRInt32 mResidualStyleCount; - PRInt32 mContextTopIndex; - - //break this struct out separately so that lame compilers don't gack. - //By using these bits instead of bools, we have a bit-o-memory. - struct CFlags { - PRUint8 mHadBody:1; - PRUint8 mHadFrameset:1; - PRUint8 mHasOpenHead:1; - PRUint8 mTransitional:1; - }; - - union { - PRUint32 mAllBits; - CFlags mFlags; - }; - - nsTokenAllocator *mTokenAllocator; - nsNodeAllocator *mNodeAllocator; - CTableState *mTableStates; - nsDeque mEntities; - -#ifdef NS_DEBUG - enum { eMaxTags = 100 }; - eHTMLTags mXTags[eMaxTags]; - PRInt32 *mCounters; - - void ResetCounters(void); - void AllocateCounters(void); - PRInt32 IncrementCounter(eHTMLTags aTag,nsIParserNode& aNode,nsString& aResult); - - CNamedEntity* RegisterEntity(const nsAString& aName,const nsAString& aValue); - CNamedEntity* GetEntity(const nsAString& aName) const; -#endif -}; - -/************************************************************** - Now define the token deallocator class... - **************************************************************/ -class CTokenDeallocator: public nsDequeFunctor{ -protected: - nsFixedSizeAllocator& mArenaPool; - -public: - CTokenDeallocator(nsFixedSizeAllocator& aArenaPool) - : mArenaPool(aArenaPool) {} - - virtual void* operator()(void* anObject) { - CToken* aToken = (CToken*)anObject; - CToken::Destroy(aToken, mArenaPool); - return 0; - } -}; - - -/************************************************************************ - ITagHandler class offers an API for taking care of specific tokens. - ************************************************************************/ -class nsITagHandler { -public: - - virtual void SetString(const nsString &aTheString)=0; - virtual nsString* GetString()=0; - virtual PRBool HandleToken(CToken* aToken,nsIDTD* aDTD)=0; - virtual PRBool HandleCapturedTokens(CToken* aToken,nsIDTD* aDTD)=0; -}; - -/************************************************************************ - Here are a few useful utility methods... - ************************************************************************/ - -/** - * This method quickly scans the given set of tags, - * looking for the given tag. - * @update gess8/27/98 - * @param aTag -- tag to be search for in set - * @param aTagSet -- set of tags to be searched - * @return - */ -inline PRInt32 IndexOfTagInSet(PRInt32 aTag,const eHTMLTags* aTagSet,PRInt32 aCount) { - - const eHTMLTags* theEnd=aTagSet+aCount; - const eHTMLTags* theTag=aTagSet; - - while(theTag<theEnd) { - if(aTag==*theTag) { - return theTag-aTagSet; - } - ++theTag; - } - - return kNotFound; -} - -/** - * This method quickly scans the given set of tags, - * looking for the given tag. - * @update gess8/27/98 - * @param aTag -- tag to be search for in set - * @param aTagSet -- set of tags to be searched - * @return - */ -inline PRBool FindTagInSet(PRInt32 aTag,const eHTMLTags *aTagSet,PRInt32 aCount) { - return PRBool(-1<IndexOfTagInSet(aTag,aTagSet,aCount)); -} - -/** - * Called from various DTD's to determine the type of data in the buffer... - * @update gess 06Jun2000 - * @param aBuffer: contains a string with first block of html from source document - * @param aHasXMLFragment: tells us whether we detect XML in the buffer (based on PI) - * @return TRUE if we find HTML - */ - -// This really doesn't need to be inline! - -inline PRBool BufferContainsHTML(const nsString& aBuffer, - PRBool& aHasXMLFragment) -{ - PRBool result=PR_FALSE; - - aHasXMLFragment=PRBool(-1!=aBuffer.Find("<?XML",PR_TRUE,100)); - - PRInt32 theDocTypePos=aBuffer.Find("DOCTYPE",PR_TRUE,0,200); - if(-1!=theDocTypePos) { - PRInt32 theHTMLPos=aBuffer.Find("HTML",PR_TRUE,theDocTypePos+8,200); - if(-1==theHTMLPos) { - theHTMLPos=aBuffer.Find("ISO/IEC 15445",PR_TRUE,theDocTypePos+8,200); - if(-1==theHTMLPos) { - theHTMLPos=aBuffer.Find("HYPERTEXT MARKUP",PR_TRUE,theDocTypePos+8,200); - } - } - - result=PRBool(-1!=theHTMLPos); - } - else { - //worst case scenario: let's look for a few HTML tags... - PRInt32 theCount = 0; - PRInt32 theTagCount = 0; - - nsAString::const_iterator iter, end; - aBuffer.BeginReading(iter); - aBuffer.EndReading(end); - - if (Distance(iter, end) > 200) { - end = iter; - end.advance(200); - } - - for(theCount = 0; theCount < 5; ++theCount) { - if (!FindCharInReadable('<', iter, end)) { - break; - } - - // we found what may be a start tag... - - ++iter; // step over the '<' character - - nsAString::const_iterator tag_end(iter); - - aBuffer.EndReading(end); - - while (tag_end != end) { - const PRUnichar c = *tag_end; - - if (c == ' ' || c == '>' || c == '"') { - break; - } - - ++tag_end; - } - - nsHTMLTag theTag = nsHTMLTags::LookupTag(Substring(iter, tag_end)); - - if (theTag != eHTMLTag_userdefined) { - ++theTagCount; - } - - iter = tag_end; - } - - // Claim HTML if we find at least 2 real html tags... - result = (2 <= theTagCount); - } - - return result; -} - - -/****************************************************************************** - This little structure is used to compute CRC32 values for our debug validator - ******************************************************************************/ - -struct CRCStruct { - CRCStruct(eHTMLTags aTag,PRInt32 anOp) {mTag=aTag; mOperation=anOp;} - eHTMLTags mTag; - PRInt32 mOperation; //usually open or close -}; - -/************************************************************** - This defines the topic object used by the observer service. - The observerService uses a list of these, 1 per topic when - registering tags. - **************************************************************/ - -class nsObserverEntry : public nsIObserverEntry { -public: - NS_DECL_ISUPPORTS - nsObserverEntry(const nsAString& aString); - virtual ~nsObserverEntry(); - - NS_IMETHOD Notify(nsIParserNode* aNode, - nsIParser* aParser, - nsISupports* aWebShell, - const PRUint32 aFlags); - - nsresult AddObserver(nsIElementObserver* aObserver,eHTMLTags aTag); - void RemoveObserver(nsIElementObserver* aObserver); - PRBool Matches(const nsAString& aTopic); - -protected: - nsAutoString mTopic; // This will rarely be empty, so make it an auto string - nsVoidArray* mObservers[NS_HTML_TAG_MAX + 1]; - friend class nsMatchesTopic; -}; - -/*********************************************************************************************/ - - -struct TagList { - PRUint32 mCount; - const eHTMLTags *mTags; -}; - -/** - * Find the last member of given taglist on the given context - * @update gess 12/14/99 - * @param aContext - * @param aTagList - * @return index of tag, or kNotFound if not found - */ -inline PRInt32 LastOf(nsDTDContext& aContext, const TagList& aTagList){ - int max = aContext.GetCount(); - int index; - for(index=max-1;index>=0;index--){ - PRBool result=FindTagInSet(aContext[index],aTagList.mTags,aTagList.mCount); - if(result) { - return index; - } - } - return kNotFound; -} - -/** - * Find the first member of given taglist on the given context - * @update gess 12/14/99 - * @param aContext - * @param aStartOffset - * @param aTagList - * @return index of tag, or kNotFound if not found - */ -inline PRInt32 FirstOf(nsDTDContext& aContext,PRInt32 aStartOffset,TagList& aTagList){ - int max = aContext.GetCount(); - int index; - for(index=aStartOffset;index<max;++index){ - PRBool result=FindTagInSet(aContext[index],aTagList.mTags,aTagList.mCount); - if(result) { - return index; - } - } - return kNotFound; -} - - -/** - * Call this to find out whether the DTD thinks the tag requires an END tag </xxx> - * @update gess 01/04/99 - * @param id of tag - * @return TRUE of the element's end tag is optional - */ -inline PRBool HasOptionalEndTag(eHTMLTags aTag) { - static eHTMLTags gHasOptionalEndTags[]={eHTMLTag_body,eHTMLTag_colgroup,eHTMLTag_dd,eHTMLTag_dt, - eHTMLTag_head,eHTMLTag_li,eHTMLTag_option, - eHTMLTag_p,eHTMLTag_tbody,eHTMLTag_td,eHTMLTag_tfoot, - eHTMLTag_th,eHTMLTag_thead,eHTMLTag_tr, - eHTMLTag_userdefined,eHTMLTag_unknown}; - return FindTagInSet(aTag,gHasOptionalEndTags,sizeof(gHasOptionalEndTags)/sizeof(eHTMLTag_body)); -} -#endif diff --git a/htmlparser/src/nsElementTable.cpp b/htmlparser/src/nsElementTable.cpp deleted file mode 100644 index 6a998448e2ab..000000000000 --- a/htmlparser/src/nsElementTable.cpp +++ /dev/null @@ -1,2318 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - - -/** - * MODULE NOTES: - * @update gess 4/1/98 - * - */ - -#include "nsIAtom.h" -#include "nsElementTable.h" - -/***************************************************************************** - Now it's time to list all the html elements all with their capabilities... -******************************************************************************/ - -#define DECL_TAG_LIST(name_, list_) \ - static const eHTMLTags name_##list[] = list_; \ - static const TagList name_ = { NS_ARRAY_LENGTH(name_##list), name_##list }; - -#define COMMA , - -//First, define the set of taglists for tags with special parents... -DECL_TAG_LIST(gAParents,{eHTMLTag_map}) -DECL_TAG_LIST(gInAddress,{eHTMLTag_address}) -DECL_TAG_LIST(gInHead,{eHTMLTag_head}) -DECL_TAG_LIST(gInTable,{eHTMLTag_table}) -DECL_TAG_LIST(gInHTML,{eHTMLTag_html}) -DECL_TAG_LIST(gInBody,{eHTMLTag_body}) -DECL_TAG_LIST(gInForm,{eHTMLTag_form}) -DECL_TAG_LIST(gInFieldset,{eHTMLTag_fieldset}) -DECL_TAG_LIST(gInTR,{eHTMLTag_tr}) -DECL_TAG_LIST(gInDL,{eHTMLTag_dl COMMA eHTMLTag_body}) -DECL_TAG_LIST(gInFrameset,{eHTMLTag_frameset}) -DECL_TAG_LIST(gInNoframes,{eHTMLTag_noframes}) -//Removed ADDRESS to solve 24885 -// gInP: nsHTMLElement::CanContain() also allows table in Quirks mode for bug 43678, removed FORM bug 94269 -DECL_TAG_LIST(gInP,{eHTMLTag_span}) -DECL_TAG_LIST(gOptgroupParents,{eHTMLTag_select COMMA eHTMLTag_optgroup}) -DECL_TAG_LIST(gBodyParents,{eHTMLTag_html COMMA eHTMLTag_noframes}) -DECL_TAG_LIST(gColParents,{eHTMLTag_table COMMA eHTMLTag_colgroup}) -DECL_TAG_LIST(gFramesetParents,{eHTMLTag_html COMMA eHTMLTag_frameset}) -DECL_TAG_LIST(gLegendParents,{eHTMLTag_fieldset}) -DECL_TAG_LIST(gAreaParent,{eHTMLTag_map}) -DECL_TAG_LIST(gParamParents,{eHTMLTag_applet COMMA eHTMLTag_object}) -DECL_TAG_LIST(gTRParents,{eHTMLTag_tbody COMMA eHTMLTag_tfoot COMMA eHTMLTag_thead COMMA eHTMLTag_table}) -DECL_TAG_LIST(gTREndParents,{eHTMLTag_tbody COMMA eHTMLTag_tfoot COMMA eHTMLTag_thead COMMA eHTMLTag_table COMMA eHTMLTag_applet}) - -//********************************************************************************************* -// Next, define the set of taglists for tags with special kids... -//********************************************************************************************* - -DECL_TAG_LIST(gContainsText,{eHTMLTag_text COMMA eHTMLTag_newline COMMA eHTMLTag_whitespace COMMA eHTMLTag_entity}) -DECL_TAG_LIST(gUnknownKids,{eHTMLTag_html COMMA eHTMLTag_frameset}) -DECL_TAG_LIST(gContainsOpts,{eHTMLTag_option COMMA eHTMLTag_optgroup COMMA eHTMLTag_script}) -DECL_TAG_LIST(gContainsParam,{eHTMLTag_param}) -DECL_TAG_LIST(gColgroupKids,{eHTMLTag_col}) -DECL_TAG_LIST(gAddressKids,{eHTMLTag_p}) -DECL_TAG_LIST(gBodyKids,{eHTMLTag_dd COMMA eHTMLTag_del COMMA eHTMLTag_dt COMMA eHTMLTag_ins COMMA eHTMLTag_noscript COMMA eHTMLTag_script COMMA eHTMLTag_li COMMA eHTMLTag_param}) // Added PARAM for bug 54448 -DECL_TAG_LIST(gButtonKids,{eHTMLTag_caption COMMA eHTMLTag_legend}) - -DECL_TAG_LIST(gDLRootTags,{eHTMLTag_body COMMA eHTMLTag_td COMMA eHTMLTag_table COMMA eHTMLTag_applet COMMA eHTMLTag_dd}) -DECL_TAG_LIST(gDLKids,{eHTMLTag_dd COMMA eHTMLTag_dt}) -DECL_TAG_LIST(gDTKids,{eHTMLTag_dt}) -DECL_TAG_LIST(gFieldsetKids,{eHTMLTag_legend COMMA eHTMLTag_text}) -DECL_TAG_LIST(gFontKids,{eHTMLTag_legend COMMA eHTMLTag_table COMMA eHTMLTag_text}) // Added table to fix bug 93365 -DECL_TAG_LIST(gFormKids,{eHTMLTag_keygen}) -DECL_TAG_LIST(gFramesetKids,{eHTMLTag_frame COMMA eHTMLTag_frameset COMMA eHTMLTag_noframes}) - -DECL_TAG_LIST(gHtmlKids,{eHTMLTag_body COMMA eHTMLTag_frameset COMMA eHTMLTag_head COMMA eHTMLTag_map COMMA eHTMLTag_noscript COMMA eHTMLTag_noframes COMMA eHTMLTag_script COMMA eHTMLTag_newline COMMA eHTMLTag_whitespace}) -DECL_TAG_LIST(gHeadKids,{eHTMLTag_base COMMA eHTMLTag_bgsound COMMA eHTMLTag_link COMMA eHTMLTag_meta COMMA eHTMLTag_script COMMA eHTMLTag_style COMMA eHTMLTag_title COMMA eHTMLTag_noembed}) - -DECL_TAG_LIST(gLabelKids,{eHTMLTag_span}) -DECL_TAG_LIST(gLIKids,{eHTMLTag_ol COMMA eHTMLTag_ul}) -DECL_TAG_LIST(gMapKids,{eHTMLTag_area}) -DECL_TAG_LIST(gPreKids,{eHTMLTag_hr COMMA eHTMLTag_center}) //note that CENTER is here for backward compatibility; it's not 4.0 spec. - -DECL_TAG_LIST(gTableKids,{eHTMLTag_caption COMMA eHTMLTag_col COMMA eHTMLTag_colgroup COMMA eHTMLTag_form COMMA eHTMLTag_thead COMMA eHTMLTag_tbody COMMA eHTMLTag_tfoot COMMA eHTMLTag_map COMMA eHTMLTag_script})// Removed INPUT - Ref. Bug 20087, 25382 - -DECL_TAG_LIST(gTableElemKids,{eHTMLTag_form COMMA eHTMLTag_map COMMA eHTMLTag_noscript COMMA eHTMLTag_script COMMA eHTMLTag_td COMMA eHTMLTag_th COMMA eHTMLTag_tr}) -DECL_TAG_LIST(gTRKids,{eHTMLTag_td COMMA eHTMLTag_th COMMA eHTMLTag_form COMMA eHTMLTag_script})// Removed INPUT - Ref. Bug 20087, 25382 | Removed MAP to fix 58942 -DECL_TAG_LIST(gTBodyKids,{eHTMLTag_tr COMMA eHTMLTag_form}) // Removed INPUT - Ref. Bug 20087, 25382 -DECL_TAG_LIST(gULKids,{eHTMLTag_li COMMA eHTMLTag_p}) - - -//********************************************************************************************* -// The following tag lists are used to define common set of root notes for the HTML elements... -//********************************************************************************************* - -DECL_TAG_LIST(gRootTags,{eHTMLTag_body COMMA eHTMLTag_td COMMA eHTMLTag_table COMMA eHTMLTag_applet COMMA eHTMLTag_select}) // Added SELECT to fix bug 98645 -DECL_TAG_LIST(gTableRootTags,{eHTMLTag_applet COMMA eHTMLTag_body COMMA eHTMLTag_dl COMMA eHTMLTag_ol COMMA eHTMLTag_td COMMA eHTMLTag_th}) -DECL_TAG_LIST(gHTMLRootTags,{eHTMLTag_unknown}) - -DECL_TAG_LIST(gLIRootTags,{eHTMLTag_ul COMMA eHTMLTag_ol COMMA eHTMLTag_dir COMMA eHTMLTag_menu COMMA eHTMLTag_p COMMA eHTMLTag_body COMMA eHTMLTag_td COMMA eHTMLTag_th}) - -DECL_TAG_LIST(gOLRootTags,{eHTMLTag_body COMMA eHTMLTag_li COMMA eHTMLTag_td COMMA eHTMLTag_th COMMA eHTMLTag_select}) -DECL_TAG_LIST(gTDRootTags,{eHTMLTag_tr COMMA eHTMLTag_tbody COMMA eHTMLTag_thead COMMA eHTMLTag_tfoot COMMA eHTMLTag_table COMMA eHTMLTag_applet}) -DECL_TAG_LIST(gNoframeRoot,{eHTMLTag_body COMMA eHTMLTag_frameset}) - -//********************************************************************************************* -// The following tag lists are used to define the autoclose properties of the html elements... -//********************************************************************************************* - -DECL_TAG_LIST(gBodyAutoClose,{eHTMLTag_head}) -DECL_TAG_LIST(gTBodyAutoClose,{eHTMLTag_thead COMMA eHTMLTag_tfoot COMMA eHTMLTag_tbody COMMA eHTMLTag_td COMMA eHTMLTag_th}) // TD|TH inclusion - Bug# 24112 -DECL_TAG_LIST(gCaptionAutoClose,{eHTMLTag_tbody}) -DECL_TAG_LIST(gLIAutoClose,{eHTMLTag_p COMMA eHTMLTag_li}) -DECL_TAG_LIST(gPAutoClose,{eHTMLTag_p COMMA eHTMLTag_li}) -DECL_TAG_LIST(gHRAutoClose,{eHTMLTag_p}) -DECL_TAG_LIST(gOLAutoClose,{eHTMLTag_p COMMA eHTMLTag_ol}) -DECL_TAG_LIST(gDivAutoClose,{eHTMLTag_p}) - -DECL_TAG_LIST(gHeadingTags,{eHTMLTag_h1 COMMA eHTMLTag_h2 COMMA eHTMLTag_h3 COMMA eHTMLTag_h4 COMMA eHTMLTag_h5 COMMA eHTMLTag_h6}) - -DECL_TAG_LIST(gTableCloseTags,{eHTMLTag_td COMMA eHTMLTag_tr COMMA eHTMLTag_th COMMA eHTMLTag_tbody COMMA eHTMLTag_thead COMMA eHTMLTag_tfoot}) -DECL_TAG_LIST(gTRCloseTags,{eHTMLTag_tr COMMA eHTMLTag_td COMMA eHTMLTag_th}) -DECL_TAG_LIST(gTDCloseTags,{eHTMLTag_td COMMA eHTMLTag_th}) -DECL_TAG_LIST(gDTCloseTags,{eHTMLTag_p COMMA eHTMLTag_dd COMMA eHTMLTag_dt}) -DECL_TAG_LIST(gULCloseTags,{eHTMLTag_li}) -DECL_TAG_LIST(gULAutoClose,{eHTMLTag_p COMMA eHTMLTag_ul}) //fix bug 50261.. - - -DECL_TAG_LIST(gExcludableParents,{eHTMLTag_pre}) // Ref Bug 22913 -DECL_TAG_LIST(gCaptionExcludableParents,{eHTMLTag_td}) //Ref Bug 26488 - -//********************************************************************************************* -//Lastly, bind tags with their rules, their special parents and special kids. -//********************************************************************************************* - - -const int kNoPropRange=0; -const int kDefaultPropRange=1; -const int kBodyPropRange=2; - -PRBool CanBeContainedLI(eHTMLTags aChildTag,nsDTDContext &aContext); - -//********************************************************************************************* -// -// Now let's dynamically build the element table... -// -//********************************************************************************************* -nsHTMLElement* gHTMLElements=0; - - -void Initialize(eHTMLTags aTag, - eHTMLTags aRequiredAncestor, - eHTMLTags aExcludingAncestor, - const TagList* aRootNodes, - const TagList* aEndRootNodes, - const TagList* aAutocloseStart, - const TagList* aAutocloseEnd, - const TagList* aSynonymousTags, - const TagList* aExcludableParents, - int aParentBits, - int aInclusionBits, - int aExclusionBits, - int aSpecialProperties, - PRUint32 aPropagateRange, - const TagList* aSpecialParents, - const TagList* aSpecialKids, - eHTMLTags aSkipTarget - ) -{ - gHTMLElements[aTag].mTagID=aTag; - gHTMLElements[aTag].mRequiredAncestor=aRequiredAncestor; - gHTMLElements[aTag].mExcludingAncestor=aExcludingAncestor; - gHTMLElements[aTag].mRootNodes=aRootNodes; - gHTMLElements[aTag].mEndRootNodes=aEndRootNodes; - gHTMLElements[aTag].mAutocloseStart=aAutocloseStart; - gHTMLElements[aTag].mAutocloseEnd=aAutocloseEnd; - gHTMLElements[aTag].mSynonymousTags=aSynonymousTags; - gHTMLElements[aTag].mExcludableParents=aExcludableParents; - gHTMLElements[aTag].mParentBits=aParentBits; - gHTMLElements[aTag].mInclusionBits=aInclusionBits; - gHTMLElements[aTag].mExclusionBits=aExclusionBits; - gHTMLElements[aTag].mSpecialProperties=aSpecialProperties; - gHTMLElements[aTag].mPropagateRange=aPropagateRange; - gHTMLElements[aTag].mSpecialParents=aSpecialParents; - gHTMLElements[aTag].mSpecialKids=aSpecialKids; - gHTMLElements[aTag].mSkipTarget=aSkipTarget; - gHTMLElements[aTag].mCanBeContained=0; //most use the default impl. -} - - -void InitializeElementTable(void) { - if(!gHTMLElements) { - gHTMLElements=new nsHTMLElement[eHTMLTag_userdefined+5]; - - Initialize( - /*tag*/ eHTMLTag_unknown, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kNone, kNone, kNone, - /*special props, prop-range*/ kNonContainer, 10, - /*special parents,kids,skip*/ 0,&gUnknownKids,eHTMLTag_unknown); - - /************************************************* - Note: I changed A to contain flow elements - since it's such a popular (but illegal) - idiom. - *************************************************/ - - Initialize( - /*tag*/ eHTMLTag_a, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kSpecial, kInlineEntity, kNone, - /*special props, prop-range*/ kVerifyHierarchy,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_abbr, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kPhrase, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_acronym, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kPhrase, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_address, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kBlock, kInlineEntity, kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gAddressKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_applet, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kSpecial, (kSelf|kInlineEntity|kFlowEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gContainsParam,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_area, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gAreaParent,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kNone, kInlineEntity, kSelf, - /*special props, prop-range*/ kNonContainer,kDefaultPropRange, - /*special parents,kids,skip*/ &gAreaParent,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_b, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFontStyle, (kInlineEntity|kSelf), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_base, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInHead, &gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kNone, kNone, kNone, - /*special props, prop-range*/ kNonContainer, kNoPropRange, - /*special parents,kids,skip*/ &gInHead,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_basefont, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kSpecial, kInlineEntity, kNone, - /*special props, prop-range*/ kNonContainer, kNoPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_bdo, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kSpecial, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_bgsound, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kBlock, kNone, kNone, - /*special props, prop-range*/ 0,kNoPropRange, - /*special parents,kids,skip*/ &gInHead,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_big, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFontStyle, (kInlineEntity|kSelf), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_blink, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFontStyle, (kFlowEntity|kSelf), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_blockquote, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, //remove excludeable parents to fix bug 53473 - /*parent,incl,exclgroups*/ kBlock, (kSelf|kFlowEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_body, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_frameset, - /*rootnodes,endrootnodes*/ &gInHTML, &gInHTML, - /*autoclose starttags and endtags*/ &gBodyAutoClose,0,0,0, - /*parent,incl,exclgroups*/ kHTMLContent,(kFlowEntity|kSelf), kNone, - /*special props, prop-range*/ kOmitEndTag, kBodyPropRange, - /*special parents,kids,skip*/ 0,&gBodyKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_br, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kSpecial, kNone, kNone, - /*special props, prop-range*/ kNonContainer, kNoPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_button, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFormControl, kFlowEntity, kFormControl, - /*special props, prop-range*/ kRequiresBody,kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gButtonKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_caption, - /*req-parent excl-parent*/ eHTMLTag_table,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInTable,&gInTable, - /*autoclose starttags and endtags*/ &gCaptionAutoClose,0,0,0, - /*parent,incl,exclgroups*/ kNone, kFlowEntity, kSelf, - /*special props, prop-range*/ (kNoPropagate|kNoStyleLeaksOut),kDefaultPropRange, - /*special parents,kids,skip*/ &gInTable,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_center, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kBlock, (kSelf|kFlowEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_cite, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kPhrase, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_code, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kPhrase, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_col, - /*req-parent excl-parent*/ eHTMLTag_table,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gColParents,&gColParents, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kNone, kNone, kNone, - /*special props, prop-range*/ kNoPropagate|kNonContainer,kDefaultPropRange, - /*special parents,kids,skip*/ &gColParents,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_colgroup, - /*req-parent excl-parent*/ eHTMLTag_table,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInTable,&gInTable, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kNone, kNone, kNone, - /*special props, prop-range*/ kNoPropagate,kDefaultPropRange, - /*special parents,kids,skip*/ &gInTable,&gColgroupKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_counter, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kPhrase, (kFlowEntity|kSelf), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_dd, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags, &gRootTags, - /*autoclose starttags and endtags*/ &gDTCloseTags,0,&gDLKids,0, - /*parent,incl,exclgroups*/ kInlineEntity, kFlowEntity, kNone, - /*special props, prop-range*/ kNoPropagate|kMustCloseSelf|kVerifyHierarchy,kDefaultPropRange, - /*special parents,kids,skip*/ &gInDL,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_del, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFlowEntity, (kSelf|kFlowEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ &gInBody,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_dfn, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kPhrase, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_dir, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gOLRootTags,&gOLRootTags, - /*autoclose starttags and endtags*/ &gOLAutoClose, &gULCloseTags, 0,0, - /*parent,incl,exclgroups*/ kList, (kFlowEntity|kSelf), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gULKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_div, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ &gDivAutoClose,0,0,0, - /*parent,incl,exclgroups*/ kBlock, (kSelf|kFlowEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_dl, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gDLRootTags,&gRootTags, //fix bug 57634 - /*autoclose starttags and endtags*/ 0,0,0,&gDTKids, // DT should not contain DL - bug 100466 - /*parent,incl,exclgroups*/ kBlock, kSelf|kFlowEntity, kNone, - /*special props, prop-range*/ 0, kNoPropRange, - /*special parents,kids,skip*/ 0,&gDLKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_dt, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags, &gRootTags, - /*autoclose starttags and endtags*/ &gDTCloseTags,0,&gDLKids,0, - /*parent,incl,exclgroups*/ kInlineEntity, (kFlowEntity-kHeading), kNone, // dt's parent group is inline - bug 65467 - /*special props, prop-range*/ (kNoPropagate|kMustCloseSelf|kVerifyHierarchy),kDefaultPropRange, - /*special parents,kids,skip*/ &gInDL,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_em, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kPhrase, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_embed, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kBlockEntity, kNone, kNone, - /*special props, prop-range*/ kNonContainer,kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gContainsParam,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_endnote, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFlowEntity, kFlowEntity, kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_fieldset, - /*requiredAncestor*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kBlock, (kSelf|kFlowEntity), kNone, - /*special props, prop-range*/ kNoPropagate,kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gFieldsetKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_font, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFontStyle, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gFontKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_form, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kBlock, kFlowEntity, kNone, - /*special props, prop-range*/ kNoStyleLeaksIn, kNoPropRange, - /*special parents,kids,skip*/ 0,&gFormKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_frame, - /*req-parent excl-parent*/ eHTMLTag_frameset,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInFrameset,&gInFrameset, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kNone, kNone, kNone, - /*special props, prop-range*/ kNoPropagate|kNoStyleLeaksIn|kNonContainer, kNoPropRange, - /*special parents,kids,skip*/ &gInFrameset,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_frameset, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_body, - /*rootnodes,endrootnodes*/ &gFramesetParents,&gInHTML, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kHTMLContent, kSelf, kAllTags, - /*special props, prop-range*/ kNoPropagate|kNoStyleLeaksIn, kNoPropRange, - /*special parents,kids,skip*/ &gInHTML,&gFramesetKids,eHTMLTag_unknown); - - - Initialize( - /*tag*/ eHTMLTag_h1, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ &gHeadingTags, &gHeadingTags, &gHeadingTags,0, - /*parent,incl,exclgroups*/ kHeading, kFlowEntity, kNone, - /*special props, prop-range*/ kVerifyHierarchy,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_h2, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ &gHeadingTags, &gHeadingTags, &gHeadingTags,0, - /*parent,incl,exclgroups*/ kHeading, kFlowEntity, kNone, - /*special props, prop-range*/ kVerifyHierarchy,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_h3, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ &gHeadingTags, &gHeadingTags, &gHeadingTags,0, - /*parent,incl,exclgroups*/ kHeading, kFlowEntity, kNone, - /*special props, prop-range*/ kVerifyHierarchy,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_h4, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ &gHeadingTags, &gHeadingTags, &gHeadingTags,0, - /*parent,incl,exclgroups*/ kHeading, kFlowEntity, kNone, - /*special props, prop-range*/ kVerifyHierarchy,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_h5, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ &gHeadingTags, &gHeadingTags, &gHeadingTags,0, - /*parent,incl,exclgroups*/ kHeading, kFlowEntity, kNone, - /*special props, prop-range*/ kVerifyHierarchy,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_h6, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ &gHeadingTags, &gHeadingTags, &gHeadingTags,0, - /*parent,incl,exclgroups*/ kHeading, kFlowEntity, kNone, - /*special props, prop-range*/ kVerifyHierarchy,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_head, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInHTML, &gInHTML, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kHTMLContent, (kHeadContent|kHeadMisc), kNone, - /*special props, prop-range*/ kNoStyleLeaksIn, kDefaultPropRange, - /*special parents,kids,skip*/ &gInHTML,&gHeadKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_hr, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ &gHRAutoClose,0,0,0, - /*parent,incl,exclgroups*/ kBlock, kNone, kNone, - /*special props, prop-range*/ kNonContainer,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_html, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_html, - /*rootnodes,endrootnodes*/ &gHTMLRootTags, &gHTMLRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kNone, kHTMLContent, kNone, - /*special props, prop-range*/ kSaveMisplaced|kOmitEndTag|kNoStyleLeaksIn, kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gHtmlKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_i, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFontStyle, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0, kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_iframe, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kSpecial, (kSelf|kFlowEntity), kNone, - /*special props, prop-range*/ kNoStyleLeaksIn, kNoPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_image, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kSpecial, kNone, kNone, - /*special props, prop-range*/ kNonContainer,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_img, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kSpecial, kNone, kNone, - /*special props, prop-range*/ kNonContainer,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_input, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFormControl, kNone, kNone, - /*special props, prop-range*/ kNonContainer|kRequiresBody,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_ins, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFlowEntity, (kSelf|kFlowEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_isindex, - /*requiredAncestor*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ (kBlock|kHeadContent), kFlowEntity, kNone, - /*special props, prop-range*/ kNonContainer,kDefaultPropRange, - /*special parents,kids,skip*/ &gInBody,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_kbd, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kPhrase, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_keygen, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFlowEntity, kNone, kNone, - /*special props, prop-range*/ kNonContainer,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_label, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFormControl, kInlineEntity, kSelf, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gLabelKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_legend, - /*requiredAncestor*/ eHTMLTag_fieldset,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInFieldset,&gInFieldset, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kNone, kInlineEntity, kNone, - /*special props, prop-range*/ kRequiresBody,kDefaultPropRange, - /*special parents,kids,skip*/ &gInFieldset,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_li, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gLIRootTags,&gLIRootTags, - /*autoclose starttags and endtags*/ &gLIAutoClose,0,0,0, - /*parent,incl,exclgroups*/ kFlowEntity, kFlowEntity, kSelf, // For compat. sake LI's parent model should be flow - Ref. bug 96031 - /*special props, prop-range*/ kNoPropagate|kVerifyHierarchy, kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gLIKids,eHTMLTag_unknown); - - gHTMLElements[eHTMLTag_li].mCanBeContained=&CanBeContainedLI; - - Initialize( - /*tag*/ eHTMLTag_link, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInHead,&gInHead, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kHeadMisc, kNone, kNone, - /*special props, prop-range*/ kNonContainer,kDefaultPropRange, - /*special parents,kids,skip*/ &gInHead,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_listing, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kPreformatted, (kSelf|kFlowEntity), kNone, //add flowentity to fix 54993 - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_map, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kSpecial, kInlineEntity|kBlockEntity, kNone, - /*special props, prop-range*/ 0, kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gMapKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_marquee, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kBlock, kFlowEntity, kNone, - /*special props, prop-range*/ 0, kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_menu, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kList, (kSelf|kFlowEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gULKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_meta, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInHead, &gInHead, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kHeadMisc, kNone, kNone, - /*special props, prop-range*/ kNoStyleLeaksIn|kNonContainer, kDefaultPropRange, - /*special parents,kids,skip*/ &gInHead,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_multicol, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kExtensions, kNone, kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_nobr, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kInlineEntity, (kFlowEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_noembed, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kNone, kNone, kNone, - /*special props, prop-range*/ kDiscardTag, kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_noembed); - - Initialize( - /*tag*/ eHTMLTag_noframes, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gNoframeRoot,&gNoframeRoot, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kBlock, kFlowEntity, kNone, - /*special props, prop-range*/ 0, kNoPropRange, - /*special parents,kids,skip*/ &gNoframeRoot,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_noscript, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kBlock, kFlowEntity|kSelf, kNone, - /*special props, prop-range*/ kLegalOpen, kNoPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_object, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ (kHeadMisc|kSpecial), (kFlowEntity|kInlineEntity|kSelf), kNone, - /*special props, prop-range*/ kNoStyleLeaksOut,kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gContainsParam,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_ol, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gOLRootTags,&gOLRootTags, - /*autoclose starttags and endtags*/ &gOLAutoClose, &gULCloseTags, 0,0, - /*parent,incl,exclgroups*/ kList, (kFlowEntity|kSelf), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gULKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_optgroup, - /*requiredAncestor*/ eHTMLTag_select,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gOptgroupParents,&gOptgroupParents, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kNone, kNone, kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ &gOptgroupParents,&gContainsOpts,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_option, - /*requiredAncestor*/ eHTMLTag_select,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gOptgroupParents,&gOptgroupParents, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kNone, kPCDATA, kFlowEntity, - /*special props, prop-range*/ kNoStyleLeaksIn|kNoPropagate, kDefaultPropRange, - /*special parents,kids,skip*/ &gOptgroupParents,&gContainsText,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_p, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kBlock, kInlineEntity, kNone, //this used to contain FLOW. But it's really an inline container. - /*special props, prop-range*/ kHandleStrayTag,kDefaultPropRange, //otherwise it tries to contain things like H1..H6 - /*special parents,kids,skip*/ 0,&gInP,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_param, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gParamParents, &gParamParents, - /*autoclose starttags and endtags*/ &gPAutoClose,0,0,0, - /*parent,incl,exclgroups*/ kNone, kNone, kNone, - /*special props, prop-range*/ kNonContainer, kNoPropRange, - /*special parents,kids,skip*/ &gParamParents,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_parsererror, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ &gDivAutoClose,0,0,0, - /*parent,incl,exclgroups*/ kNone, (kSelf|kFlowEntity), kNone, - /*special props, prop-range*/ 0, kNoPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_plaintext, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kExtensions, kFlowEntity, kNone, - /*special props, prop-range*/ kNone,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_plaintext); // Bug 56914 - - Initialize( - /*tag*/ eHTMLTag_pre, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kBlock|kPreformatted, (kSelf|kFlowEntity), kNone, // Note: PRE is a block level element - bug 80009 - /*special props, prop-range*/ 0, kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gPreKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_q, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kSpecial, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_s, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFontStyle, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_samp, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kPhrase, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_script, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ (kSpecial|kHeadMisc), kCDATA, kNone, - /*special props, prop-range*/ kNoStyleLeaksIn|kLegalOpen, kNoPropRange, - /*special parents,kids,skip*/ 0,&gContainsText,eHTMLTag_script); - - Initialize( - /*tag*/ eHTMLTag_select, - /*requiredAncestor*/ eHTMLTag_unknown, eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInForm,&gInForm, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFormControl, kNone, kFlowEntity|kDLChild, - /*special props, prop-range*/ kNoPropagate|kNoStyleLeaksIn, kDefaultPropRange, - /*special parents,kids,skip*/ &gInForm,&gContainsOpts,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_server, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ (kSpecial|kHeadMisc), kCDATA, kNone, - /*special props, prop-range*/ (kNoStyleLeaksIn|kLegalOpen), kNoPropRange, - /*special parents,kids,skip*/ 0,&gContainsText,eHTMLTag_server); - - Initialize( - /*tag*/ eHTMLTag_small, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFontStyle, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - - /*tag*/ eHTMLTag_sound, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ (kFlowEntity|kHeadContent), kNone, kNone, // Added kFlowEntity|kHeadContent & kNonContainer in - /*special props, prop-range*/ kNonContainer,kDefaultPropRange, // Ref. to Bug 25749 - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_sourcetext, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ &gDivAutoClose,0,0,0, - /*parent,incl,exclgroups*/ kNone, (kSelf|kFlowEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - - /*tag*/ eHTMLTag_spacer, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kExtensions, kNone, kNone, - /*special props, prop-range*/ kNonContainer,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - - // I made span a special% tag again, (instead of inline). - // This fixes the case: <font color="blue"><p><span>text</span> - - /*tag*/ eHTMLTag_span, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kSpecial, (kInlineEntity|kSelf|kFlowEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - - /*tag*/ eHTMLTag_strike, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFontStyle, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - - /*tag*/ eHTMLTag_strong, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kPhrase, (kSelf|kInlineEntity), kNone, //changed this to inline per spec; fix bug 44584. - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gContainsText,eHTMLTag_unknown); - - Initialize( - - /*tag*/ eHTMLTag_style, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInHead, &gInHead, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kHeadMisc, kCDATA, kNone, - /*special props, prop-range*/ kNoStyleLeaksIn|kNonContainer, kNoPropRange, - /*special parents,kids,skip*/ &gInHead,0,eHTMLTag_style); - - Initialize( - /*tag*/ eHTMLTag_sub, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kSpecial, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - - /*tag*/ eHTMLTag_sup, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kSpecial, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_table, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gTableRootTags,&gTableRootTags, - /*autoclose starttags and endtags*/ 0,&gTableCloseTags,0,0, - /*parent,incl,exclgroups*/ kBlock, kNone, (kSelf|kInlineEntity), - /*special props, prop-range*/ (kBadContentWatch|kNoStyleLeaksIn), 2, - /*special parents,kids,skip*/ 0,&gTableKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_tbody, - /*requiredAncestor*/ eHTMLTag_table, eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInTable, &gInTable, - /*autoclose starttags and endtags*/ &gTBodyAutoClose,0,0,0, - /*parent,incl,exclgroups*/ kNone, kNone, (kSelf|kInlineEntity), - /*special props, prop-range*/ (kNoPropagate|kBadContentWatch|kNoStyleLeaksIn|kNoStyleLeaksOut), kDefaultPropRange, - /*special parents,kids,skip*/ &gInTable,&gTBodyKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_td, - /*requiredAncestor*/ eHTMLTag_table, eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gTDRootTags,&gTDRootTags, - /*autoclose starttags and endtags*/ &gTDCloseTags,&gTDCloseTags,0,&gExcludableParents, - /*parent,incl,exclgroups*/ kNone, kFlowEntity, kSelf, - /*special props, prop-range*/ kNoStyleLeaksIn|kNoStyleLeaksOut, kDefaultPropRange, - /*special parents,kids,skip*/ &gTDRootTags,&gBodyKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_textarea, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInForm, &gInForm, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFormControl, kPCDATA, kNone, - /*special props, prop-range*/ kRequiresBody,kDefaultPropRange, - /*special parents,kids,skip*/ &gInForm,&gContainsText,eHTMLTag_textarea); - - Initialize( - /*tag*/ eHTMLTag_tfoot, - /*requiredAncestor*/ eHTMLTag_table, eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInTable, &gInTable, - /*autoclose starttags and endtags*/ &gTBodyAutoClose,0,0,0, - /*parent,incl,exclgroups*/ kNone, kNone, kSelf, - /*special props, prop-range*/ (kNoPropagate|kBadContentWatch|kNoStyleLeaksIn|kNoStyleLeaksOut), kNoPropRange, - /*special parents,kids,skip*/ &gInTable,&gTableElemKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_th, - /*requiredAncestor*/ eHTMLTag_table, eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gTDRootTags,&gTDRootTags, - /*autoclose starttags and endtags*/ &gTDCloseTags,&gTDCloseTags,0,0, - /*parent,incl,exclgroups*/ kNone, kFlowEntity, kSelf, - /*special props, prop-range*/ (kNoStyleLeaksIn|kNoStyleLeaksOut), kDefaultPropRange, - /*special parents,kids,skip*/ &gTDRootTags,&gBodyKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_thead, - /*req-parent excl-parent*/ eHTMLTag_table,eHTMLTag_unknown, //fix bug 54840... - /*rootnodes,endrootnodes*/ &gInTable,&gInTable, - /*autoclose starttags and endtags*/ &gTBodyAutoClose,0,0,0, - /*parent,incl,exclgroups*/ kNone, kNone, kSelf, - /*special props, prop-range*/ (kNoPropagate|kBadContentWatch|kNoStyleLeaksIn|kNoStyleLeaksOut), kNoPropRange, - /*special parents,kids,skip*/ &gInTable,&gTableElemKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_title, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInHead,&gInHead, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kHeadMisc,kPCDATA, kNone, - /*special props, prop-range*/ kNoStyleLeaksIn, kNoPropRange, - /*special parents,kids,skip*/ &gInHead,&gContainsText,eHTMLTag_title); - - Initialize( - /*tag*/ eHTMLTag_tr, - /*requiredAncestor*/ eHTMLTag_table, eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gTRParents,&gTREndParents, - /*autoclose starttags and endtags*/ &gTRCloseTags,0,0,0, - /*parent,incl,exclgroups*/ kNone, kNone, kInlineEntity, - /*special props, prop-range*/ (kBadContentWatch|kNoStyleLeaksIn|kNoStyleLeaksOut), kNoPropRange, - /*special parents,kids,skip*/ &gTRParents,&gTRKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_tt, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFontStyle, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_u, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFontStyle, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_ul, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gOLRootTags,&gOLRootTags, - /*autoclose starttags and endtags*/ &gULAutoClose,&gULCloseTags,0,0, - /*parent,incl,exclgroups*/ kList, (kFlowEntity|kSelf), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,&gULKids,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_var, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kPhrase, (kSelf|kInlineEntity), kNone, - /*special props, prop-range*/ 0,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_wbr, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kExtensions, kNone, kNone, - /*special props, prop-range*/ kNonContainer,kNoPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_xmp, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kInlineEntity|kPreformatted, kNone, kNone, - /*special props, prop-range*/ kNone,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_xmp); - - Initialize( - /*tag*/ eHTMLTag_text, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInBody,&gInBody, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFlowEntity, kNone, kNone, - /*special props, prop-range*/ kNonContainer|kRequiresBody,kNoPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_whitespace, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInBody,&gInBody, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFlowEntity|kHeadMisc, kNone, kNone, - /*special props, prop-range*/ kNonContainer|kLegalOpen,kNoPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_newline, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInBody,&gInBody, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFlowEntity|kHeadMisc, kNone, kNone, - /*special props, prop-range*/ kNonContainer|kLegalOpen, kNoPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_comment, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFlowEntity, kNone, kNone, - /*special props, prop-range*/ kOmitEndTag|kLegalOpen,kNoPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_entity, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gInBody,&gInBody, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFlowEntity, kNone, kNone, - /*special props, prop-range*/ 0, kNoPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_doctypeDecl, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFlowEntity, kNone, kNone, - /*special props, prop-range*/ kOmitEndTag,kNoPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_markupDecl, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFlowEntity, kNone, kNone, - /*special props, prop-range*/ kOmitEndTag,kNoPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_instruction, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, - /*rootnodes,endrootnodes*/ 0,0, - /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kFlowEntity, kNone, kNone, - /*special props, prop-range*/ kOmitEndTag,kNoPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); - - Initialize( - /*tag*/ eHTMLTag_userdefined, - /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_frameset, - /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, - /*autoclose starttags and endtags*/ &gBodyAutoClose,0,0,0, - /*parent,incl,exclgroups*/ kFlowEntity, (kInlineEntity|kSelf), kNone, // Treat userdefined as inline element - Ref bug 56245,66772 - /*special props, prop-range*/ kNone, kBodyPropRange, - /*special parents,kids,skip*/ &gInNoframes,&gBodyKids,eHTMLTag_unknown); - }//if -} - -void DeleteElementTable(void) { - if(gHTMLElements) { - delete [] gHTMLElements; - gHTMLElements=0; - } -} - -/** - * This is called to answer the CanBeContained question when LI is the parent - * @update gess 10/13/00 - * @param - * @return - */ -PRBool CanBeContainedLI(eHTMLTags aChildTag,nsDTDContext &aContext) { - PRBool result=PR_TRUE; - - //walk the parent hierarchy, to see if the LI is in a block or an inline. - PRInt32 anIndex=aContext.GetCount(); - PRBool theLIHasABlockParent=PR_FALSE; - - PRBool theChildIsBlock=PR_FALSE; - - if((aChildTag>eHTMLTag_unknown) && (aChildTag<eHTMLTag_userdefined)) { - theChildIsBlock= (eHTMLTag_dt==aChildTag) || - (eHTMLTag_dd==aChildTag) || - (gHTMLElements[aChildTag].IsMemberOf(kBlock)) || - (gHTMLElements[aChildTag].IsMemberOf(kBlockEntity)) || - (gHTMLElements[aChildTag].IsMemberOf(kHeading)) || - (gHTMLElements[aChildTag].IsMemberOf(kPreformatted))|| - (gHTMLElements[aChildTag].IsMemberOf(kList)); - } - - if(theChildIsBlock) { - - while(--anIndex>0) { - eHTMLTags aParent=aContext.TagAt(anIndex); - if((eHTMLTag_ul==aParent) || - (eHTMLTag_ol==aParent) || - (eHTMLTag_table==aParent) || - (eHTMLTag_dir==aParent)) { - theLIHasABlockParent=PR_TRUE; - break; - } - } - result=theLIHasABlockParent; - } - else { - result=PR_TRUE; - } - return result; -} - -/** - * This is the default implementation, that was moved out of CNavDTD, so that it can be made - * to behave in a more flexible manner. At this point, the code itself has not changed. - * NOTE: This is not called YET. It's just a placeholder for future changes. - * - * @update gess 10/13/00 - * @param - * @return - */ -PRBool nsHTMLElement::CanBeContained(eHTMLTags aChildTag,nsDTDContext &aContext) { - PRBool result=PR_TRUE; - if(!mCanBeContained) { - - /* # Interesting test cases: Result: - * 1. <UL><LI>..<B>..<LI> inner <LI> closes outer <LI> - * 2. <CENTER><DL><DT><A><CENTER> allow nested <CENTER> - * 3. <TABLE><TR><TD><TABLE>... allow nested <TABLE> - * 4. <FRAMESET> ... <FRAMESET> - */ - - //Note: This method is going away. First we need to get the elementtable to do closures right, and - // therefore we must get residual style handling to work. - - //the changes to this method were added to fix bug 54651... - - PRInt32 theCount=aContext.GetCount(); - result=PR_TRUE; - - if(0<theCount){ - const TagList* theRootTags=gHTMLElements[aChildTag].GetRootTags(); - const TagList* theSpecialParents=gHTMLElements[aChildTag].GetSpecialParents(); - if(theRootTags) { - PRInt32 theRootIndex=LastOf(aContext,*theRootTags); - PRInt32 theSPIndex=(theSpecialParents) ? LastOf(aContext,*theSpecialParents) : kNotFound; - PRInt32 theChildIndex=GetIndexOfChildOrSynonym(aContext,aChildTag); - PRInt32 theTargetIndex=(theRootIndex>theSPIndex) ? theRootIndex : theSPIndex; - - if((theTargetIndex==theCount-1) || - ((theTargetIndex==theChildIndex) && gHTMLElements[aChildTag].CanContainSelf())) { - result=PR_TRUE; - } - else { - - result=PR_FALSE; - - static eHTMLTags gTableElements[]={eHTMLTag_td,eHTMLTag_th}; - - PRInt32 theIndex=theCount-1; - while(theChildIndex<theIndex) { - eHTMLTags theParentTag=aContext.TagAt(theIndex--); - if (gHTMLElements[theParentTag].IsMemberOf(kBlockEntity) || - gHTMLElements[theParentTag].IsMemberOf(kHeading) || - gHTMLElements[theParentTag].IsMemberOf(kPreformatted) || - gHTMLElements[theParentTag].IsMemberOf(kFormControl) || //added this to fix bug 44479 - gHTMLElements[theParentTag].IsMemberOf(kList)) { - if(!HasOptionalEndTag(theParentTag)) { - result=PR_TRUE; - break; - } - } - else if(FindTagInSet(theParentTag,gTableElements,sizeof(gTableElements)/sizeof(eHTMLTag_unknown))){ - result=PR_TRUE; //added this to catch a case we missed; bug 57173. - break; - } - } - } - } - } - - } - else result=(*mCanBeContained)(aChildTag,aContext); - return result; -} - -/** - * Call this to find the index of a given child, or (if not found) - * the index of its nearest synonym. - * - * @update gess 3/25/98 - * @param aTagStack -- list of open tags - * @param aTag -- tag to test for containership - * @return index of kNotFound - */ -PRInt32 nsHTMLElement::GetIndexOfChildOrSynonym(nsDTDContext& aContext,eHTMLTags aChildTag) { - PRInt32 theChildIndex=aContext.LastOf(aChildTag); - if(kNotFound==theChildIndex) { - const TagList* theSynTags=gHTMLElements[aChildTag].GetSynonymousTags(); //get the list of tags that THIS tag can close - if(theSynTags) { - theChildIndex=LastOf(aContext,*theSynTags); - } - else{ - PRInt32 theGroup=nsHTMLElement::GetSynonymousGroups(aChildTag); - if(theGroup) { - theChildIndex=aContext.GetCount(); - while(-1<--theChildIndex) { - eHTMLTags theTag=aContext[theChildIndex]; - if(gHTMLElements[theTag].IsMemberOf(theGroup)) { - break; - } - } - } - } - } - return theChildIndex; -} - -int nsHTMLElement::GetSynonymousGroups(eHTMLTags aTag) { - int result=0; - - int theGroup=gHTMLElements[aTag].mParentBits; - switch(theGroup) { - - case kPhrase: - case kSpecial: - case kFontStyle: - case kHTMLContent: - case kHeadContent: - case kHeadMisc: - case kFormControl: - case kPreformatted: - case kHeading: - case kBlockMisc: - case kBlock: - case kList: - case kPCDATA: - case kExtensions: - case kTable: - case kSelf: - case kInlineEntity: - case kBlockEntity: - case kFlowEntity: - case kAllTags: - default: - break; - } - - if(eHTMLTag_font==aTag) //hack for backward compatibility - result&=kFontStyle; - - return result; -} - -/** - * - * @update gess1/21/99 - * @param - * @return - */ -PRBool nsHTMLElement::HasSpecialProperty(PRInt32 aProperty) const{ - PRBool result=TestBits(mSpecialProperties,aProperty); - return result; -} - -/** - * - * @update gess12/13/98 - * @param - * @return - */ -PRBool nsHTMLElement::IsContainer(eHTMLTags aChild) { - PRBool result=(eHTMLTag_unknown==aChild); - - if(!result){ - result=!TestBits(gHTMLElements[aChild].mSpecialProperties,kNonContainer); - } - return result; -} - -/** - * This tests whether all the bits in the parentbits - * are included in the given set. It may be too - * broad a question for most cases. - * - * @update gess12/13/98 - * @param - * @return - */ -PRBool nsHTMLElement::IsMemberOf(PRInt32 aSet) const{ - return TestBits(aSet,mParentBits); -} - -/** - * This tests whether all the bits in the parentbits - * are included in the given set. It may be too - * broad a question for most cases. - * - * @update gess12/13/98 - * @param - * @return - */ -PRBool nsHTMLElement::ContainsSet(PRInt32 aSet) const{ - return TestBits(mParentBits,aSet); -} - -/** - * This method determines whether the given tag closes other blocks. - * - * @update gess 12/20/99 -- added H1..H6 to this list. - * @param - * @return - */ -PRBool nsHTMLElement::IsBlockCloser(eHTMLTags aTag){ - PRBool result=PR_FALSE; - - if((aTag>=eHTMLTag_unknown) & (aTag<=eHTMLTag_xmp)){ - - result=(gHTMLElements[aTag].IsBlock() || - gHTMLElements[aTag].IsBlockEntity() || - (kHeading==gHTMLElements[aTag].mParentBits)); - if(!result) { - // NOBR is a block closure - Ref. Bug# 24462 - // DIR is a block closure - Ref. Bug# 25845 - // TD is a block closure - Ref. Bug# 27490 - // TR is a block closure - Ref. Bug# 26488 - // OBJECT is a block closure - Ref. Bug# 88992 - - static eHTMLTags gClosers[]={ eHTMLTag_table,eHTMLTag_tbody, - eHTMLTag_td,eHTMLTag_th, - eHTMLTag_tr,eHTMLTag_caption, - eHTMLTag_object,eHTMLTag_applet, - eHTMLTag_ol, eHTMLTag_ul, - eHTMLTag_optgroup, - eHTMLTag_nobr,eHTMLTag_dir}; - - result=FindTagInSet(aTag,gClosers,sizeof(gClosers)/sizeof(eHTMLTag_body)); - } - } - return result; -} - - -/** - * - * @update gess 01/04/99 - * @param - * @return - */ -PRBool nsHTMLElement::IsInlineEntity(eHTMLTags aTag){ - PRBool result=PR_FALSE; - if((aTag>=eHTMLTag_unknown) & (aTag<=eHTMLTag_xmp)){ - result=TestBits(gHTMLElements[aTag].mParentBits,kInlineEntity); - } - return result; -} - -/** - * - * @update gess 01/04/99 - * @param - * @return - */ -PRBool nsHTMLElement::IsFlowEntity(eHTMLTags aTag){ - PRBool result=PR_FALSE; - - if((aTag>=eHTMLTag_unknown) & (aTag<=eHTMLTag_xmp)){ - result=TestBits(gHTMLElements[aTag].mParentBits,kFlowEntity); - } - return result; -} - -/** - * - * @update gess 01/04/99 - * @param - * @return - */ -PRBool nsHTMLElement::IsBlockParent(eHTMLTags aTag){ - PRBool result=PR_FALSE; - if((aTag>=eHTMLTag_unknown) & (aTag<=eHTMLTag_xmp)){ - result=TestBits(gHTMLElements[aTag].mInclusionBits,kBlockEntity); - } - return result; -} - -/** - * - * @update gess 01/04/99 - * @param - * @return - */ -PRBool nsHTMLElement::IsInlineParent(eHTMLTags aTag){ - PRBool result=PR_FALSE; - if((aTag>=eHTMLTag_unknown) & (aTag<=eHTMLTag_xmp)){ - result=TestBits(gHTMLElements[aTag].mInclusionBits,kInlineEntity); - } - return result; -} - - -/** - * - * @update gess 01/04/99 - * @param - * @return - */ -PRBool nsHTMLElement::IsFlowParent(eHTMLTags aTag){ - PRBool result=PR_FALSE; - if((aTag>=eHTMLTag_unknown) & (aTag<=eHTMLTag_xmp)){ - result=TestBits(gHTMLElements[aTag].mInclusionBits,kFlowEntity); - } - return result; -} - -/** - * - * @update harishd 11/19/99 - * @param - * @return - */ -PRBool nsHTMLElement::IsSpecialParent(eHTMLTags aTag) const{ - PRBool result=PR_FALSE; - if(mSpecialParents) { - if(FindTagInSet(aTag,mSpecialParents->mTags,mSpecialParents->mCount)) - result=PR_TRUE; - } - return result; -} - -/** - * Tells us whether the given tag opens a section - * @update gess 01/04/99 - * @param id of tag - * @return TRUE if opens section - */ -PRBool nsHTMLElement::IsSectionTag(eHTMLTags aTag){ - PRBool result=PR_FALSE; - switch(aTag){ - case eHTMLTag_html: - case eHTMLTag_frameset: - case eHTMLTag_body: - case eHTMLTag_head: - result=PR_TRUE; - break; - default: - result=PR_FALSE; - } - return result; -} - - -/** - * - * @update gess 01/04/99 - * @param - * @return - */ -PRBool nsHTMLElement::CanContain(eHTMLTags aParent,eHTMLTags aChild,nsDTDMode aMode){ - PRBool result=PR_FALSE; - if((aParent>=eHTMLTag_unknown) && (aParent<=eHTMLTag_userdefined)){ - result=gHTMLElements[aParent].CanContain(aChild,aMode); - } - return result; -} - -/** - * - * @update gess 01/04/99 - * @param - * @return - */ -PRBool nsHTMLElement::CanExclude(eHTMLTags aChild) const{ - PRBool result=PR_FALSE; - - if(gHTMLElements[aChild].HasSpecialProperty(kLegalOpen)) { - // Some tags could be opened anywhere, in the document, as they please. - return PR_FALSE; - } - - //Note that special kids takes precedence over exclusions... - if(mSpecialKids) { - if(FindTagInSet(aChild,mSpecialKids->mTags,mSpecialKids->mCount)) { - return PR_FALSE; - } - } - - if(mExclusionBits){ - if(gHTMLElements[aChild].IsMemberOf(mExclusionBits)) { - result=PR_TRUE; - } - } - return result; -} - -/** - * - * @update harishd 03/01/00 - * @param - * @return - */ -PRBool nsHTMLElement::IsExcludableParent(eHTMLTags aParent) const{ - PRBool result=PR_FALSE; - - if(!IsTextTag(mTagID)) { - if(mExcludableParents) { - const TagList* theParents=mExcludableParents; - if(FindTagInSet(aParent,theParents->mTags,theParents->mCount)) - result=PR_TRUE; - } - if(!result) { - // If you're a block parent make sure that you're not the - // parent of a TABLE element. ex. <table><tr><td><div><td></tr></table> - // IE & Nav. render this as table with two cells ( which I think is correct ). - // NOTE: If need arise we could use the root node to solve this problem - if(nsHTMLElement::IsBlockParent(aParent)){ - switch(mTagID) { - case eHTMLTag_caption: - case eHTMLTag_thead: - case eHTMLTag_tbody: - case eHTMLTag_tfoot: - case eHTMLTag_td: - case eHTMLTag_th: - case eHTMLTag_tr: - result=PR_TRUE; - default: - break; - } - } - } - } - return result; -} - -/** - * - * @update gess 01/04/99 - * @param - * @return - */ -PRBool nsHTMLElement::CanOmitEndTag(void) const{ - PRBool result=!IsContainer(mTagID); - if(!result) - result=TestBits(mSpecialProperties,kOmitEndTag); - return result; -} - -/** - * - * @update gess 01/04/99 - * @param - * @return - */ -PRBool nsHTMLElement::CanOmitStartTag(eHTMLTags aChild) const{ - PRBool result=PR_FALSE; - return result; -} - -/** - * - * @update gess12/13/98 - * @param - * @return - */ -PRBool nsHTMLElement::IsChildOfHead(eHTMLTags aChild,PRBool& aExclusively) { -#if 0 - PRBool result=PR_FALSE; - - aExclusively=PR_FALSE; - - switch(aChild) { - - case eHTMLTag_base: - case eHTMLTag_link: - case eHTMLTag_meta: - case eHTMLTag_title: - case eHTMLTag_style: - aExclusively=result=PR_TRUE; - break; - - case eHTMLTag_bgsound: - case eHTMLTag_script: - case eHTMLTag_noembed: - case eHTMLTag_noscript: - case eHTMLTag_whitespace: - case eHTMLTag_newline: - case eHTMLTag_comment: - result=PR_TRUE; - break; - - default: - break; - } - return result; -#else - aExclusively=PR_TRUE; - return FindTagInSet(aChild,gHeadKids.mTags,gHeadKids.mCount); -#endif -} - - - -/** - * - * @update gess12/13/98 - * @param - * @return - */ -PRBool nsHTMLElement::SectionContains(eHTMLTags aChild,PRBool allowDepthSearch) { - PRBool result=PR_FALSE; - const TagList* theRootTags=gHTMLElements[aChild].GetRootTags(); - - if(theRootTags){ - if(!FindTagInSet(mTagID,theRootTags->mTags,theRootTags->mCount)){ - eHTMLTags theRootBase=theRootTags->mTags[0]; - if((eHTMLTag_unknown!=theRootBase) && (allowDepthSearch)) - result=SectionContains(theRootBase,allowDepthSearch); - } - else result=PR_TRUE; - } - return result; -} - -/** - * This method should be called to determine if the a tags - * hierarchy needs to be validated. - * - * @update harishd 04/19/00 - * @param - * @return - */ - -PRBool nsHTMLElement::ShouldVerifyHierarchy() { - PRBool result=PR_FALSE; - - // If the tag cannot contain itself then we need to make sure that - // anywhere in the hierarchy we don't nest accidently. - // Ex: <H1><LI><H1><LI>. Inner LI has the potential of getting nested - // inside outer LI.If the tag can contain self, Ex: <A><B><A>, - // ( B can contain self )then ask the child (<A>) if it requires a containment check. - if(mTagID!=eHTMLTag_userdefined) { - result=HasSpecialProperty(kVerifyHierarchy); - } - return result; -} - -/** - * - * @update gess12/13/98 - * @param - * @return - */ -PRBool nsHTMLElement::IsResidualStyleTag(eHTMLTags aChild) { - PRBool result=PR_FALSE; - switch(aChild) { - case eHTMLTag_a: - case eHTMLTag_b: - case eHTMLTag_bdo: - case eHTMLTag_big: - case eHTMLTag_blink: - case eHTMLTag_del: - case eHTMLTag_em: - case eHTMLTag_font: - case eHTMLTag_i: - case eHTMLTag_ins: - case eHTMLTag_q: - case eHTMLTag_s: - case eHTMLTag_small: - case eHTMLTag_strong: - case eHTMLTag_strike: - case eHTMLTag_sub: - case eHTMLTag_sup: - case eHTMLTag_tt: - case eHTMLTag_u: - result=PR_TRUE; - break; - - case eHTMLTag_abbr: - case eHTMLTag_acronym: - case eHTMLTag_center: - case eHTMLTag_cite: - case eHTMLTag_code: - case eHTMLTag_dfn: - case eHTMLTag_kbd: - case eHTMLTag_samp: - case eHTMLTag_span: - case eHTMLTag_var: - result=PR_FALSE; - default: - break; - }; - return result; -} - -/** - * - * @update gess12/13/98 - * @param - * @return - */ -PRBool nsHTMLElement::CanContainType(PRInt32 aType) const{ - PRInt32 answer=mInclusionBits & aType; - PRBool result=PRBool(0!=answer); - return result; -} - -/** - * - * @update gess12/13/98 - * @param - * @return - */ -PRBool nsHTMLElement::IsWhitespaceTag(eHTMLTags aChild) { - PRBool result=PR_FALSE; - - switch(aChild) { - case eHTMLTag_newline: - case eHTMLTag_whitespace: - result=PR_TRUE; - break; - default: - break; - } - return result; -} - -/** - * - * @update gess12/13/98 - * @param - * @return - */ -PRBool nsHTMLElement::IsTextTag(eHTMLTags aChild) { - PRBool result=PR_FALSE; - - switch(aChild) { - case eHTMLTag_text: - case eHTMLTag_entity: - case eHTMLTag_newline: - case eHTMLTag_whitespace: - result=PR_TRUE; - break; - default: - break; - } - return result; -} - -/** - * - * @update gess12/13/98 - * @param - * @return - */ -PRBool nsHTMLElement::CanContainSelf(void) const { - PRBool result=PRBool(TestBits(mInclusionBits,kSelf)!=0); - return result; -} - -/** - * This method is called to determine (once and for all) whether a start tag - * can close another tag on the stack. This method will return - * false if something prevents aParentTag from closing. - * - * @update gess 12/20/99 - * @param aContext is the tag stack we're testing against - * @param aChildTag is the child we're trying to close - * @param aCount is the number tags we should test - * @return TRUE if we can autoclose the start tag; FALSE otherwise - */ -PRBool nsHTMLElement::CanAutoCloseTag(nsDTDContext& aContext,eHTMLTags aChildTag) const{ - - PRInt32 thePos=aContext.GetCount(); - PRBool result=PR_FALSE; - eHTMLTags thePrevTag=eHTMLTag_unknown; - - for(thePos=aContext.GetCount()-1;thePos>0;thePos--) { - thePrevTag=aContext.TagAt(thePos); - switch(thePrevTag) { - case eHTMLTag_applet: - case eHTMLTag_td: - thePos=0; - result=PR_FALSE; - break; - case eHTMLTag_body: - result=aChildTag!=thePrevTag; - thePos=0; - default: - if(aChildTag==thePrevTag) { - result=PR_TRUE; - thePos=0; - } - break; - } //switch - } //for - - return result; -} - -/** - * - * @update gess 10.17.2000 - * @param - * @return - */ -eHTMLTags nsHTMLElement::GetCloseTargetForEndTag(nsDTDContext& aContext,PRInt32 anIndex,nsDTDMode aMode) const{ - eHTMLTags result=eHTMLTag_unknown; - - int theCount=aContext.GetCount(); - int theIndex=theCount; - - if(IsMemberOf(kPhrase)){ - - while((--theIndex>=anIndex) && (eHTMLTag_unknown==result)){ - eHTMLTags theTag=aContext.TagAt(theIndex); - if(theTag!=mTagID) { - - //fixes a derivative of bug 22842... - if(CanContainType(kBlock)) { //INS/DEL can contain blocks. - if(gHTMLElements[eHTMLTags(theTag)].IsMemberOf(kBlockEntity) || gHTMLElements[eHTMLTags(theTag)].IsMemberOf(kFlowEntity)) { - if(HasOptionalEndTag(theTag)) { - continue; //then I can close it. - } - } - } - - //phrasal elements can close other phrasals, along with fontstyle and special tags... - if(!gHTMLElements[theTag].IsMemberOf(kSpecial|kFontStyle|kPhrase)) { //fix bug 56665 - break; //it's not something I can close - } - } - else { - result=theTag; //stop because you just found yourself on the stack - break; - } - } - } - - else if(IsMemberOf(kSpecial)){ - - while((--theIndex>=anIndex) && (eHTMLTag_unknown==result)){ - eHTMLTags theTag=aContext.TagAt(theIndex); - if(theTag!=mTagID) { - //phrasal elements can close other phrasals, along with fontstyle and special tags... - - if((eHTMLTag_userdefined==theTag) || - gHTMLElements[theTag].IsSpecialEntity() || - gHTMLElements[theTag].IsFontStyleEntity()|| - gHTMLElements[theTag].IsPhraseEntity()) { // Added Phrasel to fix bug 26347 - continue; - } - else { - - //fixes bug 22842... - if(CanContainType(kBlock)) { - if(gHTMLElements[eHTMLTags(theTag)].IsMemberOf(kBlockEntity) || gHTMLElements[eHTMLTags(theTag)].IsMemberOf(kFlowEntity)) { - if(HasOptionalEndTag(theTag)) { - continue; //then I can close it. - } - } - } - break; //it's not something I can close - } - } - else { - result=theTag; //stop because you just found yourself on the stack - break; - } - } - } - - else if(ContainsSet(kPreformatted) || - IsMemberOf(kFormControl|kExtensions|kPreformatted)){ //bug54834... - - while((--theIndex>=anIndex) && (eHTMLTag_unknown==result)){ - eHTMLTags theTag=aContext.TagAt(theIndex); - if(theTag!=mTagID) { - if(!CanContain(theTag,aMode)) { - break; //it's not something I can close - } - } - else { - result=theTag; //stop because you just found yourself on the stack - break; - } - } - } - - else if(IsMemberOf(kList)){ - - while((--theIndex>=anIndex) && (eHTMLTag_unknown==result)){ - eHTMLTags theTag=aContext.TagAt(theIndex); - if(theTag!=mTagID) { - if(!CanContain(theTag,aMode)) { - break; //it's not something I can close - } - } - else { - result=theTag; //stop because you just found yourself on the stack - break; - } - } - } - - else if(IsResidualStyleTag(mTagID)){ - - // Before finding a close target, for the current tag, make sure - // that the tag above does not gate. - // Note: we intentionally make 2 passes: - // The first pass tries to exactly match, the 2nd pass matches the group. - - const TagList* theRootTags=gHTMLElements[mTagID].GetEndRootTags(); - PRInt32 theIndexCopy=theIndex; - while(--theIndex>=anIndex){ - eHTMLTags theTag=aContext.TagAt(theIndex); - if(theTag == mTagID) { - return theTag; // we found our target. - } - else if (!CanContain(theTag,aMode) || - (theRootTags && FindTagInSet(theTag,theRootTags->mTags,theRootTags->mCount))) { - // If you cannot contain this tag then - // you cannot close it either. It looks like - // the tag trying to close is misplaced. - // In the following Exs. notice the misplaced /font: - // Ex. <font><table><tr><td></font></td></tr></table. -- Ref. bug 56245 - // Ex. <font><select><option></font></select> -- Ref. bug 37618 - // Ex. <font><select></font><option></select> -- Ref. bug 98187 - return eHTMLTag_unknown; - } - } - - theIndex=theIndexCopy; - while(--theIndex>=anIndex){ - eHTMLTags theTag=aContext.TagAt(theIndex); - if(gHTMLElements[theTag].IsMemberOf(mParentBits)) { - return theTag; - } - } - } - - else if(gHTMLElements[mTagID].IsTableElement()) { - - //This fixes 57378... - //example: <TABLE><THEAD><TR><TH></THEAD> which didn't close the <THEAD> - - PRInt32 theLastTable=aContext.LastOf(eHTMLTag_table); - PRInt32 theLastOfMe=aContext.LastOf(mTagID); - if(theLastTable<theLastOfMe) { - return mTagID; - } - - } - - return result; -} - - -/** - * See whether this tag can DIRECTLY contain the given child. - * @update gess12/13/98 - * @param - * @return - */ -PRBool nsHTMLElement::CanContain(eHTMLTags aChild,nsDTDMode aMode) const{ - - - if(IsContainer(mTagID)){ - - if(gHTMLElements[aChild].HasSpecialProperty(kLegalOpen)) { - // Some tags could be opened anywhere, in the document, as they please. - return PR_TRUE; - } - - if(mTagID==aChild) { - return CanContainSelf(); //not many tags can contain themselves... - } - - const TagList* theCloseTags=gHTMLElements[aChild].GetAutoCloseStartTags(); - if(theCloseTags){ - if(FindTagInSet(mTagID,theCloseTags->mTags,theCloseTags->mCount)) - return PR_FALSE; - } - - if(gHTMLElements[aChild].mExcludableParents) { - const TagList* theParents=gHTMLElements[aChild].mExcludableParents; - if(FindTagInSet(mTagID,theParents->mTags,theParents->mCount)) - return PR_FALSE; - } - - if(gHTMLElements[aChild].IsExcludableParent(mTagID)) - return PR_FALSE; - - if(gHTMLElements[aChild].IsBlockCloser(aChild)){ - if(nsHTMLElement::IsBlockParent(mTagID)){ - return PR_TRUE; - } - } - - if(nsHTMLElement::IsInlineEntity(aChild)){ - if(nsHTMLElement::IsInlineParent(mTagID)){ - return PR_TRUE; - } - } - - if(nsHTMLElement::IsFlowEntity(aChild)) { - if(nsHTMLElement::IsFlowParent(mTagID)){ - return PR_TRUE; - } - } - - if(nsHTMLElement::IsTextTag(aChild)) { - if(nsHTMLElement::IsInlineParent(mTagID)){ - return PR_TRUE; - } - } - - if(CanContainType(gHTMLElements[aChild].mParentBits)) { - return PR_TRUE; - } - - if(mSpecialKids) { - if(FindTagInSet(aChild,mSpecialKids->mTags,mSpecialKids->mCount)) { - return PR_TRUE; - } - } - - // Allow <p> to contain <table> only in Quirks mode, bug 43678 and bug 91927 - if (aChild == eHTMLTag_table && mTagID == eHTMLTag_p && aMode == eDTDMode_quirks) { - return PR_TRUE; - } - } - - return PR_FALSE; -} - -#ifdef DEBUG -void nsHTMLElement::DebugDumpContainment(const char* aFilename,const char* aTitle){ -} - -void nsHTMLElement::DebugDumpMembership(const char* aFilename){ -} - -void nsHTMLElement::DebugDumpContainType(const char* aFilename){ -} -#endif diff --git a/htmlparser/src/nsElementTable.h b/htmlparser/src/nsElementTable.h deleted file mode 100644 index 4f637c9f8cec..000000000000 --- a/htmlparser/src/nsElementTable.h +++ /dev/null @@ -1,277 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - - -/** - * MODULE NOTES: - * @update gess 4/1/98 - * - */ - - - -#ifndef _NSELEMENTABLE -#define _NSELEMENTABLE - -#include "nsHTMLTokens.h" -#include "nsDTDUtils.h" - - -//********************************************************************************************* -// The following ints define the standard groups of HTML elements... -//********************************************************************************************* - -static const int kNone= 0x0; - -static const int kHTMLContent = 0x0001; // HEAD, (FRAMESET | BODY) -static const int kHeadContent = 0x0002; // TITLE, ISINDEX, BASE -static const int kHeadMisc = 0x0004; // SCRIPT, STYLE, META, LINK, OBJECT - -static const int kSpecial = 0x0008; // A, IMG, APPLET, OBJECT, FONT, BASEFONT, BR, SCRIPT, - // MAP, Q, SUB, SUP, SPAN, BDO, IFRAME - -static const int kFormControl = 0x0010; // INPUT SELECT TEXTAREA LABEL BUTTON -static const int kPreformatted = 0x0020; // PRE -static const int kPreExclusion = 0x0040; // IMG, OBJECT, APPLET, BIG, SMALL, SUB, SUP, FONT, BASEFONT -static const int kFontStyle = 0x0080; // TT, I, B, U, S, STRIKE, BIG, SMALL, BLINK -static const int kPhrase = 0x0100; // EM, STRONG, DFN, CODE, SAMP, KBD, VAR, CITE, ABBR, ACRONYM -static const int kHeading = 0x0200; // H1..H6 -static const int kBlockMisc = 0x0400; // OBJECT, SCRIPT -static const int kBlock = 0x0800; // ADDRESS, BLOCKQUOTE, CENTER, DIV, DL, FIELDSET, FORM, - // ISINDEX, HR, NOSCRIPT, NOFRAMES, P, TABLE -static const int kList = 0x1000; // UL, OL, DIR, MENU -static const int kPCDATA = 0x2000; // plain text and entities... -static const int kSelf = 0x4000; // whatever THIS tag is... -static const int kExtensions = 0x8000; // BGSOUND, WBR, NOBR -static const int kTable = 0x10000;// TR,TD,THEAD,TBODY,TFOOT,CAPTION,TH -static const int kDLChild = 0x20000;// DL, DT -static const int kCDATA = 0x40000;// just plain text... - -static const int kInlineEntity = (kPCDATA|kFontStyle|kPhrase|kSpecial|kFormControl|kExtensions); // #PCDATA, %fontstyle, %phrase, %special, %formctrl -static const int kBlockEntity = (kHeading|kList|kPreformatted|kBlock); // %heading, %list, %preformatted, %block -static const int kFlowEntity = (kBlockEntity|kInlineEntity); // %blockentity, %inlineentity -static const int kAllTags = 0xffffff; - - -//********************************************************************************************* -// The following ints define the standard groups of HTML elements... -//********************************************************************************************* - - -extern void InitializeElementTable(void); -extern void DeleteElementTable(void); - -typedef PRBool (*ContainFunc)(eHTMLTags aTag,nsDTDContext &aContext); - - -/** - * We're asking the question: is aTest a member of bitset. - * - * @update gess 01/04/99 - * @param - * @return TRUE or FALSE - */ -inline PRBool TestBits(int aBitset,int aTest) { - if(aTest) { - PRInt32 result=(aBitset & aTest); - return PRBool(result==aTest); - } - return PR_FALSE; -} - - -/** - * - * @update gess 01/04/99 - * @param - * @return - */ -struct nsHTMLElement { - -#ifdef DEBUG - static void DebugDumpMembership(const char* aFilename); - static void DebugDumpContainment(const char* aFilename,const char* aTitle); - static void DebugDumpContainType(const char* aFilename); -#endif - - static PRBool IsInlineEntity(eHTMLTags aTag); - static PRBool IsFlowEntity(eHTMLTags aTag); - static PRBool IsBlockCloser(eHTMLTags aTag); - - inline PRBool IsBlock(void) { - if((mTagID>=eHTMLTag_unknown) & (mTagID<=eHTMLTag_xmp)){ - return TestBits(mParentBits,kBlock); - } - return PR_FALSE; - } - - inline PRBool IsBlockEntity(void) { - if((mTagID>=eHTMLTag_unknown) & (mTagID<=eHTMLTag_xmp)){ - return TestBits(mParentBits,kBlockEntity); - } - return PR_FALSE; - } - - inline PRBool IsSpecialEntity(void) { - if((mTagID>=eHTMLTag_unknown) & (mTagID<=eHTMLTag_xmp)){ - return TestBits(mParentBits,kSpecial); - } - return PR_FALSE; - } - - inline PRBool IsPhraseEntity(void) { - if((mTagID>=eHTMLTag_unknown) & (mTagID<=eHTMLTag_xmp)){ - return TestBits(mParentBits,kPhrase); - } - return PR_FALSE; - } - - inline PRBool IsFontStyleEntity(void) { - if((mTagID>=eHTMLTag_unknown) & (mTagID<=eHTMLTag_xmp)){ - return TestBits(mParentBits,kFontStyle); - } - return PR_FALSE; - } - - inline PRBool IsTableElement(void) { //return yes if it's a table or child of a table... - PRBool result=PR_FALSE; - - switch(mTagID) { - case eHTMLTag_table: - case eHTMLTag_thead: - case eHTMLTag_tbody: - case eHTMLTag_tfoot: - case eHTMLTag_caption: - case eHTMLTag_tr: - case eHTMLTag_td: - case eHTMLTag_th: - case eHTMLTag_col: - case eHTMLTag_colgroup: - result=PR_TRUE; - break; - default: - result=PR_FALSE; - } - return result; - } - - - static int GetSynonymousGroups(eHTMLTags aTag); - - static PRInt32 GetIndexOfChildOrSynonym(nsDTDContext& aContext,eHTMLTags aChildTag); - - const TagList* GetSynonymousTags(void) const {return mSynonymousTags;} - const TagList* GetRootTags(void) const {return mRootNodes;} - const TagList* GetEndRootTags(void) const {return mEndRootNodes;} - const TagList* GetAutoCloseStartTags(void) const {return mAutocloseStart;} - const TagList* GetAutoCloseEndTags(void) const {return mAutocloseEnd;} - eHTMLTags GetCloseTargetForEndTag(nsDTDContext& aContext,PRInt32 anIndex,nsDTDMode aMode) const; - - const TagList* GetSpecialChildren(void) const {return mSpecialKids;} - const TagList* GetSpecialParents(void) const {return mSpecialParents;} - - PRBool IsMemberOf(PRInt32 aType) const; - PRBool ContainsSet(PRInt32 aType) const; - PRBool CanContainType(PRInt32 aType) const; - - eHTMLTags GetTag(void) const {return mTagID;} - PRBool CanContain(eHTMLTags aChild,nsDTDMode aMode) const; - PRBool CanExclude(eHTMLTags aChild) const; - PRBool CanOmitStartTag(eHTMLTags aChild) const; - PRBool CanOmitEndTag(void) const; - PRBool CanContainSelf(void) const; - PRBool CanAutoCloseTag(nsDTDContext& aContext,eHTMLTags aTag) const; - PRBool HasSpecialProperty(PRInt32 aProperty) const; - PRBool IsSpecialParent(eHTMLTags aTag) const; - PRBool IsExcludableParent(eHTMLTags aParent) const; - PRBool SectionContains(eHTMLTags aTag,PRBool allowDepthSearch); - PRBool ShouldVerifyHierarchy(); - - PRBool CanBeContained(eHTMLTags aParentTag,nsDTDContext &aContext); //default version - - static PRBool CanContain(eHTMLTags aParent,eHTMLTags aChild,nsDTDMode aMode); - static PRBool IsContainer(eHTMLTags aTag) ; - static PRBool IsResidualStyleTag(eHTMLTags aTag) ; - static PRBool IsTextTag(eHTMLTags aTag); - static PRBool IsWhitespaceTag(eHTMLTags aTag); - - static PRBool IsBlockParent(eHTMLTags aTag); - static PRBool IsInlineParent(eHTMLTags aTag); - static PRBool IsFlowParent(eHTMLTags aTag); - static PRBool IsSectionTag(eHTMLTags aTag); - static PRBool IsChildOfHead(eHTMLTags aTag,PRBool& aExclusively) ; - - eHTMLTags mTagID; - eHTMLTags mRequiredAncestor; - eHTMLTags mExcludingAncestor; //If set, the presence of the excl-ancestor prevents this from opening. - const TagList* mRootNodes; //These are the tags above which you many not autoclose a START tag - const TagList* mEndRootNodes; //These are the tags above which you many not autoclose an END tag - const TagList* mAutocloseStart; //these are the start tags that you can automatically close with this START tag - const TagList* mAutocloseEnd; //these are the start tags that you can automatically close with this END tag - const TagList* mSynonymousTags; //These are morally equivalent; an end tag for one can close a start tag for another (like <Hn>) - const TagList* mExcludableParents; //These are the TAGS that cannot contain you - int mParentBits; //defines groups that can contain this element - int mInclusionBits; //defines parental and containment rules - int mExclusionBits; //defines things you CANNOT contain - int mSpecialProperties; //used for various special purposes... - PRUint32 mPropagateRange; //tells us how far a parent is willing to prop. badly formed children - const TagList* mSpecialParents; //These are the special tags that contain this tag (directly) - const TagList* mSpecialKids; //These are the extra things you can contain - eHTMLTags mSkipTarget; //If set, then we skip all content until this tag is seen - ContainFunc mCanBeContained; -}; - -extern nsHTMLElement* gHTMLElements; - -//special property bits... -static const int kDiscardTag = 0x0001; //tells us to toss this tag -static const int kOmitEndTag = 0x0002; //safely ignore end tag -static const int kLegalOpen = 0x0004; //Lets BODY, TITLE, SCRIPT to reopen -static const int kNoPropagate = 0x0008; //If set, this tag won't propagate as a child -static const int kBadContentWatch = 0x0010; - -static const int kNoStyleLeaksIn = 0x0020; -static const int kNoStyleLeaksOut = 0x0040; - -static const int kMustCloseSelf = 0x0080; -static const int kSaveMisplaced = 0x0100; //If set, then children this tag can't contain are pushed onto the misplaced stack -static const int kNonContainer = 0x0200; //If set, then this tag is not a container. -static const int kHandleStrayTag = 0x0400; //If set, we automatically open a start tag -static const int kRequiresBody = 0x0800; //If set, then in case of no BODY one will be opened up immediately. -static const int kVerifyHierarchy = 0x1000; //If set, check to see if the tag is a child or a sibling.. - -#endif diff --git a/htmlparser/src/nsExpatDriver.cpp b/htmlparser/src/nsExpatDriver.cpp deleted file mode 100644 index 868bc8105181..000000000000 --- a/htmlparser/src/nsExpatDriver.cpp +++ /dev/null @@ -1,1218 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -#include "nsExpatDriver.h" -#include "nsIParser.h" -#include "nsCOMPtr.h" -#include "nsParserCIID.h" -#include "CParserContext.h" -#include "nsIExpatSink.h" -#include "nsIContentSink.h" -#include "nsParserMsgUtils.h" -#include "nsIURL.h" -#include "nsIUnicharInputStream.h" -#include "nsNetUtil.h" -#include "prprf.h" -#include "prmem.h" -#include "nsTextFormatter.h" -#include "nsDirectoryServiceDefs.h" -#include "nsCRT.h" - -static const char kWhitespace[] = " \r\n\t"; // Optimized for typical cases - -/***************************** EXPAT CALL BACKS *******************************/ - - // The callback handlers that get called from the expat parser -PR_STATIC_CALLBACK(void) -Driver_HandleStartElement(void *aUserData, - const XML_Char *aName, - const XML_Char **aAtts) -{ - NS_ASSERTION(aUserData, "expat driver should exist"); - if (aUserData) { - NS_STATIC_CAST(nsExpatDriver*,aUserData)->HandleStartElement((const PRUnichar*)aName, - (const PRUnichar**)aAtts); - } -} - -PR_STATIC_CALLBACK(void) -Driver_HandleEndElement(void *aUserData, - const XML_Char *aName) -{ - NS_ASSERTION(aUserData, "expat driver should exist"); - if (aUserData) { - NS_STATIC_CAST(nsExpatDriver*,aUserData)->HandleEndElement((const PRUnichar*)aName); - } -} - -PR_STATIC_CALLBACK(void) -Driver_HandleCharacterData(void *aUserData, - const XML_Char *aData, - int aLength) -{ - NS_ASSERTION(aUserData, "expat driver should exist"); - if (aUserData) { - NS_STATIC_CAST(nsExpatDriver*,aUserData)->HandleCharacterData((PRUnichar*)aData, - PRUint32(aLength)); - } -} - -PR_STATIC_CALLBACK(void) -Driver_HandleComment(void *aUserData, - const XML_Char *aName) -{ - NS_ASSERTION(aUserData, "expat driver should exist"); - if(aUserData) { - NS_STATIC_CAST(nsExpatDriver*,aUserData)->HandleComment((const PRUnichar*)aName); - } -} - -PR_STATIC_CALLBACK(void) -Driver_HandleProcessingInstruction(void *aUserData, - const XML_Char *aTarget, - const XML_Char *aData) -{ - NS_ASSERTION(aUserData, "expat driver should exist"); - if (aUserData) { - NS_STATIC_CAST(nsExpatDriver*,aUserData)->HandleProcessingInstruction((const PRUnichar*)aTarget, - (const PRUnichar*)aData); - } -} - -PR_STATIC_CALLBACK(void) -Driver_HandleDefault(void *aUserData, - const XML_Char *aData, - int aLength) -{ - NS_ASSERTION(aUserData, "expat driver should exist"); - if (aUserData) { - NS_STATIC_CAST(nsExpatDriver*,aUserData)->HandleDefault((const PRUnichar*)aData, - PRUint32(aLength)); - } -} - -PR_STATIC_CALLBACK(void) -Driver_HandleStartCdataSection(void *aUserData) -{ - NS_ASSERTION(aUserData, "expat driver should exist"); - if (aUserData) { - NS_STATIC_CAST(nsExpatDriver*,aUserData)->HandleStartCdataSection(); - } -} - -PR_STATIC_CALLBACK(void) -Driver_HandleEndCdataSection(void *aUserData) -{ - NS_ASSERTION(aUserData, "expat driver should exist"); - if (aUserData) { - NS_STATIC_CAST(nsExpatDriver*,aUserData)->HandleEndCdataSection(); - } -} - -PR_STATIC_CALLBACK(void) -Driver_HandleStartDoctypeDecl(void *aUserData, - const XML_Char *aDoctypeName) -{ - NS_ASSERTION(aUserData, "expat driver should exist"); - if (aUserData) { - NS_STATIC_CAST(nsExpatDriver*,aUserData)->HandleStartDoctypeDecl(); - } -} - -PR_STATIC_CALLBACK(void) -Driver_HandleEndDoctypeDecl(void *aUserData) -{ - NS_ASSERTION(aUserData, "expat driver should exist"); - if (aUserData) { - NS_STATIC_CAST(nsExpatDriver*,aUserData)->HandleEndDoctypeDecl(); - } -} - -PR_STATIC_CALLBACK(int) -Driver_HandleExternalEntityRef(void* aExternalEntityRefHandler, - const XML_Char *openEntityNames, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId) -{ - NS_ASSERTION(aExternalEntityRefHandler, "expat driver should exist"); - if (aExternalEntityRefHandler) { - return NS_STATIC_CAST(nsExpatDriver*, - aExternalEntityRefHandler)->HandleExternalEntityRef( - (const PRUnichar*)openEntityNames, (const PRUnichar*)base, - (const PRUnichar*)systemId, (const PRUnichar*)publicId); - } - return 1; -} - -/***************************** END CALL BACKS *********************************/ - -/***************************** CATALOG UTILS **********************************/ - -// Initially added for bug 113400 to switch from the remote "XHTML 1.0 plus -// MathML 2.0" DTD to the the lightweight customized version that Mozilla uses. -// Since Mozilla is not validating, no need to fetch a *huge* file at each click. -// XXX The cleanest solution here would be to fix Bug 98413: Implement XML Catalogs -struct nsCatalogData { - const char* mPublicID; - const char* mLocalDTD; - const char* mAgentSheet; -}; - -// The order of this table is guestimated to be in the optimum order -static const nsCatalogData kCatalogTable[] = { - {"-//W3C//DTD XHTML 1.0 Transitional//EN", "xhtml11.dtd", nsnull }, - {"-//W3C//DTD XHTML 1.1//EN", "xhtml11.dtd", nsnull }, - {"-//W3C//DTD XHTML 1.0 Strict//EN", "xhtml11.dtd", nsnull }, - {"-//W3C//DTD XHTML 1.0 Frameset//EN", "xhtml11.dtd", nsnull }, - {"-//W3C//DTD XHTML Basic 1.0//EN", "xhtml11.dtd", nsnull }, - {"-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN", "mathml.dtd", "resource://gre/res/mathml.css" }, - {"-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN", "mathml.dtd", "resource://gre/res/mathml.css" }, - {"-//W3C//DTD MathML 2.0//EN", "mathml.dtd", "resource://gre/res/mathml.css" }, - {"-//W3C//DTD SVG 20001102//EN", "svg.dtd", nsnull }, - {"-//WAPFORUM//DTD XHTML Mobile 1.0//EN", "xhtml11.dtd", nsnull }, - {nsnull, nsnull, nsnull} -}; - -static const nsCatalogData* -LookupCatalogData(const PRUnichar* aPublicID) -{ - nsCAutoString publicID; - publicID.AssignWithConversion(aPublicID); - - // linear search for now since the number of entries is going to - // be negligible, and the fix for bug 98413 would get rid of this - // code anyway - const nsCatalogData* data = kCatalogTable; - while (data->mPublicID) { - if (publicID.Equals(data->mPublicID)) { - return data; - } - ++data; - } - return nsnull; -} - -// aCatalogData can be null. If not null, it provides a hook to additional -// built-in knowledge on the resource that we are trying to load. -// aDTD is an in/out parameter. Returns true if the local DTD specified in the -// catalog data exists or if the filename contained within the url exists in -// the special DTD directory. If either of this exists, aDTD is set to the -// file: url that points to the DTD file found in the local DTD directory AND -// the old URI is relased. -static PRBool -IsLoadableDTD(const nsCatalogData* aCatalogData, nsCOMPtr<nsIURI>* aDTD) -{ - PRBool isLoadable = PR_FALSE; - nsresult res = NS_OK; - - if (!aDTD || !*aDTD) { - NS_ASSERTION(0, "Null parameter."); - return PR_FALSE; - } - - nsCAutoString fileName; - if (aCatalogData) { - // remap the DTD to a known local DTD - fileName.Assign(aCatalogData->mLocalDTD); - } - if (fileName.IsEmpty()) { - // try to see if the user has installed the DTD file -- we extract the - // filename.ext of the DTD here. Hence, for any DTD for which we have - // no predefined mapping, users just have to copy the DTD file to our - // special DTD directory and it will be picked - nsCOMPtr<nsIURL> dtdURL; - dtdURL = do_QueryInterface(*aDTD, &res); - if (NS_FAILED(res)) { - return PR_FALSE; - } - res = dtdURL->GetFileName(fileName); - if (NS_FAILED(res) || fileName.IsEmpty()) { - return PR_FALSE; - } - } - - nsCOMPtr<nsIFile> dtdPath; - NS_GetSpecialDirectory(NS_GRE_DIR, - getter_AddRefs(dtdPath)); - - if (!dtdPath) - return PR_FALSE; - - nsCOMPtr<nsILocalFile> lfile = do_QueryInterface(dtdPath); - - // append res/dtd/<fileName> - // can't do AppendRelativeNativePath("res/dtd/" + fileName) - // as that won't work on all platforms. - lfile->AppendNative(NS_LITERAL_CSTRING("res")); - lfile->AppendNative(NS_LITERAL_CSTRING("dtd")); - lfile->AppendNative(fileName); - - PRBool exists; - dtdPath->Exists(&exists); - - if (exists) { - // The DTD was found in the local DTD directory. - // Set aDTD to a file: url pointing to the local DT - nsCOMPtr<nsIURI> dtdURI; - NS_NewFileURI(getter_AddRefs(dtdURI), dtdPath); - - if (dtdURI) { - *aDTD = dtdURI; - isLoadable = PR_TRUE; - } - } - - return isLoadable; -} - -/***************************** END CATALOG UTILS ******************************/ - -NS_IMPL_ISUPPORTS2(nsExpatDriver, - nsITokenizer, - nsIDTD) - -nsresult -NS_NewExpatDriver(nsIDTD** aResult) { - nsExpatDriver* driver = nsnull; - NS_NEWXPCOM(driver, nsExpatDriver); - NS_ENSURE_TRUE(driver,NS_ERROR_OUT_OF_MEMORY); - - return driver->QueryInterface(NS_GET_IID(nsIDTD), (void**)aResult); -} - -nsExpatDriver::nsExpatDriver() - :mExpatParser(0), - mInCData(PR_FALSE), - mInDoctype(PR_FALSE), - mInExternalDTD(PR_FALSE), - mHandledXMLDeclaration(PR_FALSE), - mBytePosition(0), - mInternalState(NS_OK), - mBytesParsed(0), - mSink(0), - mCatalogData(nsnull) -{ -} - -nsExpatDriver::~nsExpatDriver() -{ - NS_IF_RELEASE(mSink); - if (mExpatParser) { - XML_ParserFree(mExpatParser); - mExpatParser = nsnull; - } -} - -nsresult -nsExpatDriver::HandleStartElement(const PRUnichar *aValue, - const PRUnichar **aAtts) -{ - NS_ASSERTION(mSink, "content sink not found!"); - - // Calculate the total number of elements in aAtts. - // XML_GetSpecifiedAttributeCount will only give us the number of specified - // attrs (twice that number, actually), so we have to check for default attrs - // ourselves. - PRUint32 attrArrayLength; - for (attrArrayLength = XML_GetSpecifiedAttributeCount(mExpatParser); - aAtts[attrArrayLength]; - attrArrayLength += 2) { - // Just looping till we find out what the length is - } - - if (mSink){ - mSink->HandleStartElement(aValue, aAtts, - attrArrayLength, - XML_GetIdAttributeIndex(mExpatParser), - XML_GetCurrentLineNumber(mExpatParser)); - } - return NS_OK; -} - -nsresult -nsExpatDriver::HandleEndElement(const PRUnichar *aValue) -{ - NS_ASSERTION(mSink, "content sink not found!"); - - if (mSink){ - nsresult result = mSink->HandleEndElement(aValue); - if (result == NS_ERROR_HTMLPARSER_BLOCK) { - mInternalState = NS_ERROR_HTMLPARSER_BLOCK; - XML_BlockParser(mExpatParser); - } - } - - return NS_OK; -} - -nsresult -nsExpatDriver::HandleCharacterData(const PRUnichar *aValue, - const PRUint32 aLength) -{ - NS_ASSERTION(mSink, "content sink not found!"); - - if (mInCData) { - mCDataText.Append(aValue,aLength); - } - else if (mSink){ - mInternalState = mSink->HandleCharacterData(aValue, aLength); - } - - return NS_OK; -} - -nsresult -nsExpatDriver::HandleComment(const PRUnichar *aValue) -{ - NS_ASSERTION(mSink, "content sink not found!"); - - if (mInDoctype) { - if (!mInExternalDTD) { - mDoctypeText.Append(aValue); - } - } - else if (mSink){ - mInternalState = mSink->HandleComment(aValue); - } - - return NS_OK; -} - -nsresult -nsExpatDriver::HandleProcessingInstruction(const PRUnichar *aTarget, - const PRUnichar *aData) -{ - NS_ASSERTION(mSink, "content sink not found!"); - - if (mSink){ - nsresult result = mSink->HandleProcessingInstruction(aTarget, aData); - if (result == NS_ERROR_HTMLPARSER_BLOCK) { - mInternalState = NS_ERROR_HTMLPARSER_BLOCK; - XML_BlockParser(mExpatParser); - } - } - - return NS_OK; -} - -nsresult -nsExpatDriver::HandleXMLDeclaration(const PRUnichar *aValue, - const PRUint32 aLength) -{ - mHandledXMLDeclaration = PR_TRUE; - - // <?xml version='a'?> - // 0123456789012345678 - PRUint32 i = 17; // ?> can start at position 17 at the earliest - for (; i < aLength; ++i) { - if (aValue[i] == '?') - break; - } - - // +1 because index starts from 0 - // +1 because '>' follows '?' - i += 2; - - if (i > aLength) - return NS_OK; // Bad declaration - - return mSink->HandleXMLDeclaration(aValue, i); -} - -nsresult -nsExpatDriver::HandleDefault(const PRUnichar *aValue, - const PRUint32 aLength) -{ - NS_ASSERTION(mSink, "content sink not found!"); - - if (mInDoctype) { - if (!mInExternalDTD) { - mDoctypeText.Append(aValue, aLength); - } - } - else if (mSink) { - if (!mHandledXMLDeclaration && !mBytesParsed) { - static const PRUnichar xmlDecl[] = {'<', '?', 'x', 'm', 'l', ' ', '\0'}; - // strlen("<?xml version='a'?>") == 19, shortest decl - if ((aLength >= 19) && - (nsCRT::strncmp(aValue, xmlDecl, 6) == 0)) { - HandleXMLDeclaration(aValue, aLength); - } - } - - static const PRUnichar newline[] = {'\n','\0'}; - for (PRUint32 i = 0; i < aLength && NS_SUCCEEDED(mInternalState); i++) { - if (aValue[i] == '\n' || aValue[i] == '\r') { - mInternalState = mSink->HandleCharacterData(newline, 1); - } - } - } - - return NS_OK; -} - -nsresult -nsExpatDriver::HandleStartCdataSection() -{ - mInCData = PR_TRUE; - return NS_OK; -} - -nsresult -nsExpatDriver::HandleEndCdataSection() -{ - NS_ASSERTION(mSink, "content sink not found!"); - - mInCData = PR_FALSE; - if (mSink) { - mInternalState = mSink->HandleCDataSection(mCDataText.get(),mCDataText.Length()); - } - mCDataText.Truncate(); - - return NS_OK; -} - -/** - * DOCTYPE declaration is covered with very strict rules, which - * makes our life here simpler because the XML parser has already - * detected errors. The only slightly problematic case is whitespace - * between the tokens. There MUST be whitespace between the tokens - * EXCEPT right before > and [. - * - * We assume the string will not contain the ending '>'. - */ -static void -GetDocTypeToken(nsString& aStr, - nsString& aToken, - PRBool aQuotedString) -{ - aStr.Trim(kWhitespace,PR_TRUE,PR_FALSE); // If we don't do this we must look ahead - // before Cut() and adjust the cut amount. - if (aQuotedString) { - PRInt32 endQuote = aStr.FindChar(aStr[0],1); - aStr.Mid(aToken,1,endQuote-1); - aStr.Cut(0,endQuote+1); - } else { - static const char* kDelimiter = " [\r\n\t"; // Optimized for typical cases - PRInt32 tokenEnd = aStr.FindCharInSet(kDelimiter); - if (tokenEnd < 0) { - tokenEnd = aStr.Length(); - } - if (tokenEnd > 0) { - aStr.Left(aToken, tokenEnd); - aStr.Cut(0, tokenEnd); - } - } -} - -nsresult -nsExpatDriver::HandleStartDoctypeDecl() -{ - mInDoctype = PR_TRUE; - // Consuming a huge DOCTYPE translates to numerous - // allocations. In an effort to avoid too many allocations - // setting mDoctypeText's capacity to be 1K ( just a guesstimate! ). - mDoctypeText.SetCapacity(1024); - return NS_OK; -} - -nsresult -nsExpatDriver::HandleEndDoctypeDecl() -{ - NS_ASSERTION(mSink, "content sink not found!"); - - mInDoctype = PR_FALSE; - - if(mSink) { - // let the sink know any additional knowledge that we have about the document - // (currently, from bug 124570, we only expect to pass additional agent sheets - // needed to layout the XML vocabulary of the document) - nsCOMPtr<nsIURI> data; - if (mCatalogData && mCatalogData->mAgentSheet) { - NS_NewURI(getter_AddRefs(data), mCatalogData->mAgentSheet); - } - - nsAutoString name; - GetDocTypeToken(mDoctypeText, name, PR_FALSE); - - nsAutoString token, publicId, systemId; - GetDocTypeToken(mDoctypeText, token, PR_FALSE); - if (token.Equals(NS_LITERAL_STRING("PUBLIC"))) { - GetDocTypeToken(mDoctypeText, publicId, PR_TRUE); - GetDocTypeToken(mDoctypeText, systemId, PR_TRUE); - } - else if (token.Equals(NS_LITERAL_STRING("SYSTEM"))) { - GetDocTypeToken(mDoctypeText, systemId, PR_TRUE); - } - - // The rest is the internal subset with [] (minus whitespace) - mDoctypeText.Trim(kWhitespace); - // Take out the brackets too, if any - if (mDoctypeText.Length() > 2) { - const nsAString& internalSubset = Substring(mDoctypeText, 1, - mDoctypeText.Length() - 2); - mInternalState = mSink->HandleDoctypeDecl(internalSubset, - name, - systemId, - publicId, - data); - } else { - // There's nothing but brackets, don't include them - mInternalState = mSink->HandleDoctypeDecl(nsString(),// !internalSubset - name, - systemId, - publicId, - data); - } - - } - - mDoctypeText.SetCapacity(0); - - return NS_OK; -} - -static NS_METHOD -ExternalDTDStreamReaderFunc(nsIUnicharInputStream* aIn, - void* aClosure, - const PRUnichar* aFromSegment, - PRUint32 aToOffset, - PRUint32 aCount, - PRUint32 *aWriteCount) -{ - // Pass the buffer to expat for parsing. XML_Parse returns 0 for - // fatal errors. - if (XML_Parse((XML_Parser)aClosure, (char *)aFromSegment, - aCount * sizeof(PRUnichar), 0)) { - *aWriteCount = aCount; - return NS_OK; - } - *aWriteCount = 0; - return NS_ERROR_FAILURE; -} - -int -nsExpatDriver::HandleExternalEntityRef(const PRUnichar *openEntityNames, - const PRUnichar *base, - const PRUnichar *systemId, - const PRUnichar *publicId) -{ - if (mInDoctype && !mInExternalDTD && openEntityNames) { - mDoctypeText.Append(PRUnichar('%')); - mDoctypeText.Append(nsDependentString(openEntityNames)); - mDoctypeText.Append(PRUnichar(';')); - } - - int result = 1; - - // Load the external entity into a buffer - nsCOMPtr<nsIInputStream> in; - nsAutoString absURL; - - nsresult rv = OpenInputStreamFromExternalDTD(publicId, - systemId, - base, - getter_AddRefs(in), - absURL); - - if (NS_FAILED(rv)) { - return result; - } - - nsCOMPtr<nsIUnicharInputStream> uniIn; - - rv = NS_NewUTF8ConverterStream(getter_AddRefs(uniIn), in, 1024); - - if (NS_FAILED(rv)) { - return result; - } - - if (uniIn) { - XML_Parser entParser = - XML_ExternalEntityParserCreate( - mExpatParser, - 0, - (const XML_Char*) NS_LITERAL_STRING("UTF-16").get()); - - if (entParser) { - XML_SetBase(entParser, (const XML_Char*) absURL.get()); - - mInExternalDTD = PR_TRUE; - - PRUint32 totalRead; - do { - rv = uniIn->ReadSegments(ExternalDTDStreamReaderFunc, - (void*)entParser, PRUint32(-1), &totalRead); - } while (NS_SUCCEEDED(rv) && totalRead > 0); - - result = XML_Parse(entParser, nsnull, 0, 1); - - mInExternalDTD = PR_FALSE; - - XML_ParserFree(entParser); - } - } - - return result; -} - -nsresult -nsExpatDriver::OpenInputStreamFromExternalDTD(const PRUnichar* aFPIStr, - const PRUnichar* aURLStr, - const PRUnichar* aBaseURL, - nsIInputStream** in, - nsAString& aAbsURL) -{ - nsresult rv; - nsCOMPtr<nsIURI> baseURI; - rv = NS_NewURI(getter_AddRefs(baseURI), NS_ConvertUTF16toUTF8(aBaseURL)); - if (NS_SUCCEEDED(rv) && baseURI) { - nsCOMPtr<nsIURI> uri; - rv = NS_NewURI(getter_AddRefs(uri), NS_ConvertUTF16toUTF8(aURLStr), nsnull, - baseURI); - if (NS_SUCCEEDED(rv) && uri) { - // check if it is alright to load this uri - PRBool isChrome = PR_FALSE; - uri->SchemeIs("chrome", &isChrome); - if (!isChrome) { - // since the url is not a chrome url, check to see if we can map the DTD - // to a known local DTD, or if a DTD file of the same name exists in the - // special DTD directory - if (aFPIStr) { - // see if the Formal Public Identifier (FPI) maps to a catalog entry - mCatalogData = LookupCatalogData(aFPIStr); - } - if (!IsLoadableDTD(mCatalogData, address_of(uri))) - return NS_ERROR_NOT_IMPLEMENTED; - } - rv = NS_OpenURI(in, uri); - nsCAutoString absURL; - uri->GetSpec(absURL); - CopyUTF8toUTF16(absURL, aAbsURL); - } - } - return rv; -} - -static nsresult -CreateErrorText(const PRUnichar* aDescription, - const PRUnichar* aSourceURL, - const PRInt32 aLineNumber, - const PRInt32 aColNumber, - nsString& aErrorString) -{ - aErrorString.Truncate(); - - nsAutoString msg; - nsresult rv = nsParserMsgUtils::GetLocalizedStringByName(XMLPARSER_PROPERTIES,"XMLParsingError",msg); - if (NS_FAILED(rv)) { - return rv; - } - - // XML Parsing Error: %1$S\nLocation: %2$S\nLine Number %3$d, Column %4$d: - PRUnichar *message = nsTextFormatter::smprintf(msg.get(),aDescription,aSourceURL,aLineNumber,aColNumber); - if (!message) { - return NS_ERROR_OUT_OF_MEMORY; - } - aErrorString.Assign(message); - nsTextFormatter::smprintf_free(message); - - return NS_OK; -} - -static nsresult -CreateSourceText(const PRInt32 aColNumber, - const PRUnichar* aSourceLine, - nsString& aSourceString) -{ - PRInt32 errorPosition = aColNumber; - - aSourceString.Append(aSourceLine); - aSourceString.Append(PRUnichar('\n')); - for (PRInt32 i = 0; i < errorPosition - 1; ++i) { - aSourceString.Append(PRUnichar('-')); - } - aSourceString.Append(PRUnichar('^')); - - return NS_OK; -} - -nsresult -nsExpatDriver::HandleError(const char *aBuffer, - PRUint32 aLength, - PRBool aIsFinal) -{ - - PRInt32 code = XML_GetErrorCode(mExpatParser); - NS_WARN_IF_FALSE(code >= 1, "unexpected XML error code"); - - // Map Expat error code to an error string - // XXX Deal with error returns. - nsAutoString description; - nsParserMsgUtils::GetLocalizedStringByID(XMLPARSER_PROPERTIES, code, description); - - if (code == XML_ERROR_TAG_MISMATCH) { - nsAutoString msg; - nsParserMsgUtils::GetLocalizedStringByName(XMLPARSER_PROPERTIES, "Expected", msg); - // . Expected: </%S>. - PRUnichar *message = nsTextFormatter::smprintf(msg.get(), (const PRUnichar*)XML_GetMismatchedTag(mExpatParser)); - if (!message) { - return NS_ERROR_OUT_OF_MEMORY; - } - description.Append(message); - nsTextFormatter::smprintf_free(message); - } - - nsAutoString sourceLine; - if (!aIsFinal) { - GetLine(aBuffer, aLength, (XML_GetCurrentByteIndex(mExpatParser) - mBytesParsed), sourceLine); - } - else { - sourceLine.Append(mLastLine); - } - - // Adjust the column number so that it is one based rather than zero based. - PRInt32 colNumber = XML_GetCurrentColumnNumber(mExpatParser) + 1; - - nsAutoString errorText; - CreateErrorText(description.get(), - (PRUnichar*)XML_GetBase(mExpatParser), - XML_GetCurrentLineNumber(mExpatParser), - colNumber, errorText); - - nsAutoString sourceText; - CreateSourceText(colNumber, sourceLine.get(), sourceText); - - NS_ASSERTION(mSink,"no sink?"); - if (mSink) { - mSink->ReportError(errorText.get(), sourceText.get()); - } - - return NS_ERROR_HTMLPARSER_STOPPARSING; -} - -nsresult -nsExpatDriver::ParseBuffer(const char* aBuffer, - PRUint32 aLength, - PRBool aIsFinal) -{ - nsresult result = NS_OK; - NS_ASSERTION((aBuffer && aLength) || (aBuffer == nsnull && aLength == 0), "?"); - - if (mExpatParser && mInternalState == NS_OK) { - if (!XML_Parse(mExpatParser, aBuffer, aLength, aIsFinal)) { - if (mInternalState == NS_ERROR_HTMLPARSER_BLOCK || - mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) { - mBytePosition = (XML_GetCurrentByteIndex(mExpatParser) - mBytesParsed); - mBytesParsed += mBytePosition; - } - else { - HandleError(aBuffer,aLength,aIsFinal); - mInternalState = NS_ERROR_HTMLPARSER_STOPPARSING; - } - return mInternalState; - } - else if (aBuffer && aLength) { - // Cache the last line in the buffer - GetLine(aBuffer, aLength, aLength - sizeof(PRUnichar), mLastLine); - } - mBytesParsed += aLength; - mBytePosition = 0; - } - - return result; -} - -void -nsExpatDriver::GetLine(const char* aSourceBuffer, - PRUint32 aLength, - PRUint32 aOffset, - nsString& aLine) -{ - /* Figure out the line inside aSourceBuffer that contains character specified by aOffset. - Copy it into aLine. */ - NS_ASSERTION(aOffset >= 0 && aOffset < aLength, "?"); - /* Assert that the byteIndex and the length of the buffer is even */ - NS_ASSERTION(aOffset % 2 == 0 && aLength % 2 == 0, "?"); - PRUnichar* start = (PRUnichar* ) &aSourceBuffer[aOffset]; /* Will try to find the start of the line */ - PRUnichar* end = (PRUnichar* ) &aSourceBuffer[aOffset]; /* Will try to find the end of the line */ - PRUint32 startIndex = aOffset / sizeof(PRUnichar); /* Track the position of the 'start' pointer into the buffer */ - PRUint32 endIndex = aOffset / sizeof(PRUnichar); /* Track the position of the 'end' pointer into the buffer */ - PRUint32 numCharsInBuffer = aLength / sizeof(PRUnichar); - PRBool reachedStart; - PRBool reachedEnd; - - - /* Use start to find the first new line before the error position and - end to find the first new line after the error position */ - reachedStart = (startIndex <= 0 || '\n' == *start || '\r' == *start); - reachedEnd = (endIndex >= numCharsInBuffer || '\n' == *end || '\r' == *end); - while (!reachedStart || !reachedEnd) { - if (!reachedStart) { - --start; - --startIndex; - reachedStart = (startIndex <= 0 || '\n' == *start || '\r' == *start); - } - if (!reachedEnd) { - ++end; - ++endIndex; - reachedEnd = (endIndex >= numCharsInBuffer || '\n' == *end || '\r' == *end); - } - } - - aLine.Truncate(0); - if (startIndex == endIndex) { - // Special case if the error is on a line where the only character is a newline. - // Do nothing - } - else { - NS_ASSERTION(endIndex - startIndex >= sizeof(PRUnichar), "?"); - /* At this point, there are two cases. Either the error is on the first line or - on subsequent lines. If the error is on the first line, startIndex will decrement - all the way to zero. If not, startIndex will decrement to the position of the - newline character on the previous line. So, in the first case, the start position - of the error line = startIndex (== 0). In the second case, the start position of the - error line = startIndex + 1. In both cases, the end position of the error line will be - (endIndex - 1). */ - PRUint32 startPosn = (startIndex <= 0) ? startIndex : startIndex + 1; - - /* At this point, the substring starting at startPosn and ending at (endIndex - 1), - is the line on which the error occurred. Copy that substring into the error structure. */ - const PRUnichar* unicodeBuffer = (const PRUnichar*) aSourceBuffer; - aLine.Append(&unicodeBuffer[startPosn], endIndex - startPosn); - } -} - - -NS_IMETHODIMP -nsExpatDriver::CreateNewInstance(nsIDTD** aInstancePtrResult) -{ - return NS_NewExpatDriver(aInstancePtrResult); -} - -NS_IMETHODIMP -nsExpatDriver::ConsumeToken(nsScanner& aScanner, - PRBool& aFlushTokens) -{ - // Ask the scanner to send us all the data it has - // scanned and pass that data to expat. - - mInternalState = NS_OK; // Resume in case we're blocked. - XML_UnblockParser(mExpatParser); - - nsScannerIterator start, end; - aScanner.CurrentPosition(start); - aScanner.EndReading(end); - - while (start != end) { - PRUint32 fragLength = PRUint32(start.size_forward()); - - mInternalState = ParseBuffer((const char *)start.get(), - fragLength * sizeof(PRUnichar), - aFlushTokens); - - if (NS_FAILED(mInternalState)) { - if (mInternalState == NS_ERROR_HTMLPARSER_BLOCK) { - // mBytePosition / 2 => character position. Since one char = two bytes. - aScanner.SetPosition(start.advance(mBytePosition / 2), PR_TRUE); - aScanner.Mark(); - } - return mInternalState; - } - - start.advance(fragLength); - } - - aScanner.SetPosition(end, PR_TRUE); - - if(NS_SUCCEEDED(mInternalState)) { - return aScanner.Eof(); - } - - return NS_OK; -} - -NS_IMETHODIMP_(eAutoDetectResult) -nsExpatDriver::CanParse(CParserContext& aParserContext, - const nsString& aBuffer, - PRInt32 aVersion) -{ - eAutoDetectResult result = eUnknownDetect; - - if (eViewSource != aParserContext.mParserCommand) { - if (aParserContext.mMimeType.Equals(kXMLTextContentType) || - aParserContext.mMimeType.Equals(kXMLApplicationContentType) || - aParserContext.mMimeType.Equals(kXHTMLApplicationContentType)|| - aParserContext.mMimeType.Equals(kRDFTextContentType) || -#ifdef MOZ_SVG - aParserContext.mMimeType.Equals(kSVGTextContentType) || -#endif - aParserContext.mMimeType.Equals(kXULTextContentType)) { - result=ePrimaryDetect; - } - else { - if (aParserContext.mMimeType.IsEmpty() && - kNotFound != aBuffer.Find("<?xml ")) { - aParserContext.SetMimeType(NS_LITERAL_CSTRING(kXMLTextContentType)); - result=eValidDetect; - } - } - } - - return result; -} - -NS_IMETHODIMP -nsExpatDriver::WillBuildModel(const CParserContext& aParserContext, - nsITokenizer* aTokenizer, - nsIContentSink* aSink) -{ - - NS_ENSURE_ARG_POINTER(aSink); - - aSink->QueryInterface(NS_GET_IID(nsIExpatSink),(void**)&(mSink)); - NS_ENSURE_TRUE(mSink,NS_ERROR_FAILURE); - - mExpatParser = XML_ParserCreate((const XML_Char*) NS_LITERAL_STRING("UTF-16").get()); - NS_ENSURE_TRUE(mExpatParser, NS_ERROR_FAILURE); - -#ifdef XML_DTD - XML_SetParamEntityParsing(mExpatParser, XML_PARAM_ENTITY_PARSING_ALWAYS); -#endif - - XML_SetBase(mExpatParser, (const XML_Char*) (aParserContext.mScanner->GetFilename()).get()); - - // Set up the callbacks - XML_SetElementHandler(mExpatParser, Driver_HandleStartElement, Driver_HandleEndElement); - XML_SetCharacterDataHandler(mExpatParser, Driver_HandleCharacterData); - XML_SetProcessingInstructionHandler(mExpatParser, Driver_HandleProcessingInstruction); - XML_SetDefaultHandlerExpand(mExpatParser, Driver_HandleDefault); - XML_SetExternalEntityRefHandler(mExpatParser, Driver_HandleExternalEntityRef); - XML_SetExternalEntityRefHandlerArg(mExpatParser, this); - XML_SetCommentHandler(mExpatParser, Driver_HandleComment); - XML_SetCdataSectionHandler(mExpatParser, Driver_HandleStartCdataSection, - Driver_HandleEndCdataSection); - - XML_SetParamEntityParsing(mExpatParser, XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); - XML_SetDoctypeDeclHandler(mExpatParser, Driver_HandleStartDoctypeDecl, Driver_HandleEndDoctypeDecl); - - // Set up the user data. - XML_SetUserData(mExpatParser, this); - - return aSink->WillBuildModel(); -} - -NS_IMETHODIMP -nsExpatDriver::BuildModel(nsIParser* aParser, - nsITokenizer* aTokenizer, - nsITokenObserver* anObserver, - nsIContentSink* aSink) -{ - return mInternalState; -} - -NS_IMETHODIMP -nsExpatDriver::DidBuildModel(nsresult anErrorCode, - PRBool aNotifySink, - nsIParser* aParser, - nsIContentSink* aSink) -{ - // Check for mSink is intentional. This would make sure - // that DidBuildModel() is called only once on the sink. - nsresult result = NS_OK; - if (mSink) { - result = aSink->DidBuildModel(); - NS_RELEASE(mSink); // assigns null - } - return result; -} - -NS_IMETHODIMP -nsExpatDriver::WillTokenize(PRBool aIsFinalChunk, - nsTokenAllocator* aTokenAllocator) -{ - return NS_OK; -} - -NS_IMETHODIMP -nsExpatDriver::WillResumeParse(nsIContentSink* aSink) -{ - return (aSink)? aSink->WillResume():NS_OK; -} - -NS_IMETHODIMP -nsExpatDriver::WillInterruptParse(nsIContentSink* aSink) -{ - return (aSink)? aSink->WillInterrupt():NS_OK; -} - -NS_IMETHODIMP -nsExpatDriver::DidTokenize(PRBool aIsFinalChunk) -{ - return ParseBuffer(nsnull, 0, aIsFinalChunk); -} - -NS_IMETHODIMP_(const nsIID&) -nsExpatDriver::GetMostDerivedIID(void) const -{ - return NS_GET_IID(nsIDTD); -} - -NS_IMETHODIMP_(void) -nsExpatDriver::Terminate() -{ - XML_BlockParser(mExpatParser); // XXX - not sure what happens to the unparsed data. - mInternalState = NS_ERROR_HTMLPARSER_STOPPARSING; -} - -NS_IMETHODIMP_(PRInt32) -nsExpatDriver::GetType() -{ - return NS_IPARSER_FLAG_XML; -} - -/*************************** Unused methods ***************************************/ - -NS_IMETHODIMP -nsExpatDriver::CollectSkippedContent(PRInt32 aTag, nsAString& aContent, PRInt32 &aLineNo) -{ - return NS_OK; -} - -NS_IMETHODIMP_(CToken*) -nsExpatDriver::PushTokenFront(CToken* aToken) -{ - return 0; -} - -NS_IMETHODIMP_(CToken*) -nsExpatDriver::PushToken(CToken* aToken) -{ - return 0; -} - -NS_IMETHODIMP_(CToken*) -nsExpatDriver::PopToken(void) -{ - return 0; -} - -NS_IMETHODIMP_(CToken*) -nsExpatDriver::PeekToken(void) -{ - return 0; -} - -NS_IMETHODIMP_(CToken*) -nsExpatDriver::GetTokenAt(PRInt32 anIndex) -{ - return 0; -} - -NS_IMETHODIMP_(PRInt32) -nsExpatDriver::GetCount(void) -{ - return 0; -} - -NS_IMETHODIMP_(nsTokenAllocator*) -nsExpatDriver::GetTokenAllocator(void) -{ - return 0; -} - -NS_IMETHODIMP_(void) -nsExpatDriver::PrependTokens(nsDeque& aDeque) -{ - -} - -NS_IMETHODIMP -nsExpatDriver::CopyState(nsITokenizer* aTokenizer) -{ - return NS_OK; -} - -NS_IMETHODIMP -nsExpatDriver::HandleToken(CToken* aToken,nsIParser* aParser) -{ - return NS_OK; -} - -NS_IMETHODIMP_(PRBool) -nsExpatDriver::IsBlockElement(PRInt32 aTagID,PRInt32 aParentID) const -{ - return PR_FALSE; -} - -NS_IMETHODIMP_(PRBool) -nsExpatDriver::IsInlineElement(PRInt32 aTagID,PRInt32 aParentID) const -{ - return PR_FALSE; -} - -NS_IMETHODIMP_(PRBool) -nsExpatDriver::IsContainer(PRInt32 aTag) const -{ - return PR_TRUE; -} - -NS_IMETHODIMP_(PRBool) -nsExpatDriver::CanContain(PRInt32 aParent,PRInt32 aChild) const -{ - return PR_TRUE; -} - -NS_IMETHODIMP -nsExpatDriver::StringTagToIntTag(const nsAString &aTag, PRInt32* aIntTag) const -{ - return NS_OK; -} - -NS_IMETHODIMP_(const PRUnichar *) -nsExpatDriver::IntTagToStringTag(PRInt32 aIntTag) const -{ - return 0; -} - -NS_IMETHODIMP_(nsIAtom *) -nsExpatDriver::IntTagToAtom(PRInt32 aIntTag) const -{ - return 0; -} - -/******************************************************************************/ diff --git a/htmlparser/src/nsExpatDriver.h b/htmlparser/src/nsExpatDriver.h deleted file mode 100644 index 9eddf9f34d92..000000000000 --- a/htmlparser/src/nsExpatDriver.h +++ /dev/null @@ -1,107 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef NS_EXPAT_DRIVER__ -#define NS_EXPAT_DRIVER__ - -#include "xmlparse.h" -#include "nsString.h" -#include "nsIDTD.h" -#include "nsITokenizer.h" -#include "nsIInputStream.h" - -class nsIExpatSink; -struct nsCatalogData; - -class nsExpatDriver : public nsIDTD, - public nsITokenizer -{ -public: - NS_DECL_ISUPPORTS - NS_DECL_NSIDTD - NS_DECL_NSITOKENIZER - - nsExpatDriver(); - virtual ~nsExpatDriver(); - - int HandleExternalEntityRef(const PRUnichar *openEntityNames, - const PRUnichar *base, - const PRUnichar *systemId, - const PRUnichar *publicId); - nsresult HandleStartElement(const PRUnichar *aName, const PRUnichar **aAtts); - nsresult HandleEndElement(const PRUnichar *aName); - nsresult HandleCharacterData(const PRUnichar *aCData, const PRUint32 aLength); - nsresult HandleComment(const PRUnichar *aName); - nsresult HandleProcessingInstruction(const PRUnichar *aTarget, const PRUnichar *aData); - nsresult HandleXMLDeclaration(const PRUnichar *aData, const PRUint32 aLength); - nsresult HandleDefault(const PRUnichar *aData, const PRUint32 aLength); - nsresult HandleStartCdataSection(); - nsresult HandleEndCdataSection(); - nsresult HandleStartDoctypeDecl(); - nsresult HandleEndDoctypeDecl(); - -protected: - - // Load up an external stream to get external entity information - nsresult OpenInputStreamFromExternalDTD(const PRUnichar* aFPIStr, - const PRUnichar* aURLStr, - const PRUnichar* aBaseURL, - nsIInputStream** in, - nsAString& aAbsURL); - - nsresult ParseBuffer(const char* aBuffer, PRUint32 aLength, PRBool aIsFinal); - nsresult HandleError(const char *aBuffer, PRUint32 aLength, PRBool aIsFinal); - void GetLine(const char* aSourceBuffer, PRUint32 aLength, PRUint32 aOffset, nsString& aLine); - - XML_Parser mExpatParser; - nsString mLastLine; - nsString mCDataText; - nsString mDoctypeText; - PRPackedBool mInCData; - PRPackedBool mInDoctype; - PRPackedBool mInExternalDTD; - PRPackedBool mHandledXMLDeclaration; - PRInt32 mBytePosition; - nsresult mInternalState; - PRUint32 mBytesParsed; - nsIExpatSink* mSink; - const nsCatalogData* mCatalogData; // weak - -}; -nsresult NS_NewExpatDriver(nsIDTD** aDriver); - -#endif diff --git a/htmlparser/src/nsHTMLEntities.cpp b/htmlparser/src/nsHTMLEntities.cpp deleted file mode 100644 index fa253820e40f..000000000000 --- a/htmlparser/src/nsHTMLEntities.cpp +++ /dev/null @@ -1,280 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "nsHTMLEntities.h" - - - -#include "nsString.h" -#include "nsCRT.h" -#include "prtypes.h" -#include "pldhash.h" - -struct EntityNode { - const char* mStr; // never owns buffer - PRInt32 mUnicode; -}; - -struct EntityNodeEntry : public PLDHashEntryHdr -{ - const EntityNode* node; -}; - -PR_STATIC_CALLBACK(const void*) - getStringKey(PLDHashTable*, PLDHashEntryHdr* aHdr) -{ - const EntityNodeEntry* entry = NS_STATIC_CAST(const EntityNodeEntry*, aHdr); - return entry->node->mStr; -} - -PR_STATIC_CALLBACK(const void*) - getUnicodeKey(PLDHashTable*, PLDHashEntryHdr* aHdr) -{ - const EntityNodeEntry* entry = NS_STATIC_CAST(const EntityNodeEntry*, aHdr); - return NS_INT32_TO_PTR(entry->node->mUnicode); -} - -PR_STATIC_CALLBACK(PRBool) - matchNodeString(PLDHashTable*, const PLDHashEntryHdr* aHdr, - const void* key) -{ - const EntityNodeEntry* entry = NS_STATIC_CAST(const EntityNodeEntry*, aHdr); - const char* str = NS_STATIC_CAST(const char*, key); - return (nsCRT::strcmp(entry->node->mStr, str) == 0); -} - -PR_STATIC_CALLBACK(PRBool) - matchNodeUnicode(PLDHashTable*, const PLDHashEntryHdr* aHdr, - const void* key) -{ - const EntityNodeEntry* entry = NS_STATIC_CAST(const EntityNodeEntry*, aHdr); - const PRInt32 ucode = NS_PTR_TO_INT32(key); - return (entry->node->mUnicode == ucode); -} - -PR_STATIC_CALLBACK(PLDHashNumber) - hashUnicodeValue(PLDHashTable*, const void* key) -{ - // key is actually the unicode value - return PLDHashNumber(NS_PTR_TO_INT32(key)); - } - - -static const PLDHashTableOps EntityToUnicodeOps = { - PL_DHashAllocTable, - PL_DHashFreeTable, - getStringKey, - PL_DHashStringKey, - matchNodeString, - PL_DHashMoveEntryStub, - PL_DHashClearEntryStub, - PL_DHashFinalizeStub, - nsnull, -}; - -static const PLDHashTableOps UnicodeToEntityOps = { - PL_DHashAllocTable, - PL_DHashFreeTable, - getUnicodeKey, - hashUnicodeValue, - matchNodeUnicode, - PL_DHashMoveEntryStub, - PL_DHashClearEntryStub, - PL_DHashFinalizeStub, - nsnull, -}; - -static PLDHashTable gEntityToUnicode = { 0 }; -static PLDHashTable gUnicodeToEntity = { 0 }; -static nsrefcnt gTableRefCnt = 0; - -#define HTML_ENTITY(_name, _value) { #_name, _value }, -static const EntityNode gEntityArray[] = { -#include "nsHTMLEntityList.h" -}; -#undef HTML_ENTITY - -#define NS_HTML_ENTITY_COUNT ((PRInt32)NS_ARRAY_LENGTH(gEntityArray)) - -nsresult -nsHTMLEntities::AddRefTable(void) -{ - if (!gTableRefCnt) { - if (!PL_DHashTableInit(&gEntityToUnicode, &EntityToUnicodeOps, - nsnull, sizeof(EntityNodeEntry), - PRUint32(NS_HTML_ENTITY_COUNT / 0.75))) { - gEntityToUnicode.ops = nsnull; - return NS_ERROR_OUT_OF_MEMORY; - } - if (!PL_DHashTableInit(&gUnicodeToEntity, &UnicodeToEntityOps, - nsnull, sizeof(EntityNodeEntry), - PRUint32(NS_HTML_ENTITY_COUNT / 0.75))) { - PL_DHashTableFinish(&gEntityToUnicode); - gEntityToUnicode.ops = gUnicodeToEntity.ops = nsnull; - return NS_ERROR_OUT_OF_MEMORY; - } - for (const EntityNode *node = gEntityArray, - *node_end = gEntityArray + NS_ARRAY_LENGTH(gEntityArray); - node < node_end; ++node) { - - // add to Entity->Unicode table - EntityNodeEntry* entry = - NS_STATIC_CAST(EntityNodeEntry*, - PL_DHashTableOperate(&gEntityToUnicode, - node->mStr, - PL_DHASH_ADD)); - NS_ASSERTION(entry, "Error adding an entry"); - // Prefer earlier entries when we have duplication. - if (!entry->node) - entry->node = node; - - // add to Unicode->Entity table - entry = NS_STATIC_CAST(EntityNodeEntry*, - PL_DHashTableOperate(&gUnicodeToEntity, - NS_INT32_TO_PTR(node->mUnicode), - PL_DHASH_ADD)); - NS_ASSERTION(entry, "Error adding an entry"); - // Prefer earlier entries when we have duplication. - if (!entry->node) - entry->node = node; - } - } - ++gTableRefCnt; - return NS_OK; -} - -void -nsHTMLEntities::ReleaseTable(void) -{ - if (--gTableRefCnt != 0) - return; - - if (gEntityToUnicode.ops) { - PL_DHashTableFinish(&gEntityToUnicode); - gEntityToUnicode.ops = nsnull; - } - if (gUnicodeToEntity.ops) { - PL_DHashTableFinish(&gUnicodeToEntity); - gUnicodeToEntity.ops = nsnull; - } - -} - -PRInt32 -nsHTMLEntities::EntityToUnicode(const nsCString& aEntity) -{ - NS_ASSERTION(gEntityToUnicode.ops, "no lookup table, needs addref"); - if (!gEntityToUnicode.ops) - return -1; - - //this little piece of code exists because entities may or may not have the terminating ';'. - //if we see it, strip if off for this test... - - if(';'==aEntity.Last()) { - nsCAutoString temp(aEntity); - temp.Truncate(aEntity.Length()-1); - return EntityToUnicode(temp); - } - - EntityNodeEntry* entry = - NS_STATIC_CAST(EntityNodeEntry*, - PL_DHashTableOperate(&gEntityToUnicode, aEntity.get(), PL_DHASH_LOOKUP)); - - if (!entry || PL_DHASH_ENTRY_IS_FREE(entry)) - return -1; - - return entry->node->mUnicode; -} - - -PRInt32 -nsHTMLEntities::EntityToUnicode(const nsAString& aEntity) { - nsCAutoString theEntity; theEntity.AssignWithConversion(aEntity); - if(';'==theEntity.Last()) { - theEntity.Truncate(theEntity.Length()-1); - } - - return EntityToUnicode(theEntity); -} - - -const char* -nsHTMLEntities::UnicodeToEntity(PRInt32 aUnicode) -{ - NS_ASSERTION(gUnicodeToEntity.ops, "no lookup table, needs addref"); - EntityNodeEntry* entry = - NS_STATIC_CAST(EntityNodeEntry*, - PL_DHashTableOperate(&gUnicodeToEntity, NS_INT32_TO_PTR(aUnicode), PL_DHASH_LOOKUP)); - - if (!entry || PL_DHASH_ENTRY_IS_FREE(entry)) - return nsnull; - - return entry->node->mStr; -} - -#ifdef NS_DEBUG -#include <stdio.h> - -class nsTestEntityTable { -public: - nsTestEntityTable() { - PRInt32 value; - nsHTMLEntities::AddRefTable(); - - // Make sure we can find everything we are supposed to - for (int i = 0; i < NS_HTML_ENTITY_COUNT; ++i) { - nsAutoString entity; entity.AssignWithConversion(gEntityArray[i].mStr); - - value = nsHTMLEntities::EntityToUnicode(entity); - NS_ASSERTION(value != -1, "can't find entity"); - NS_ASSERTION(value == gEntityArray[i].mUnicode, "bad unicode value"); - - entity.AssignWithConversion(nsHTMLEntities::UnicodeToEntity(value)); - NS_ASSERTION(entity.EqualsWithConversion(gEntityArray[i].mStr), "bad entity name"); - } - - // Make sure we don't find things that aren't there - value = nsHTMLEntities::EntityToUnicode(nsCAutoString("@")); - NS_ASSERTION(value == -1, "found @"); - value = nsHTMLEntities::EntityToUnicode(nsCAutoString("zzzzz")); - NS_ASSERTION(value == -1, "found zzzzz"); - nsHTMLEntities::ReleaseTable(); - } -}; -//nsTestEntityTable validateEntityTable; -#endif - diff --git a/htmlparser/src/nsHTMLEntities.h b/htmlparser/src/nsHTMLEntities.h deleted file mode 100644 index c602c08b2b8b..000000000000 --- a/htmlparser/src/nsHTMLEntities.h +++ /dev/null @@ -1,67 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -#ifndef nsHTMLEntities_h___ -#define nsHTMLEntities_h___ - -#include "nsString.h" - -class nsHTMLEntities { -public: - - static nsresult AddRefTable(void); - static void ReleaseTable(void); - -/** - * Translate an entity string into it's unicode value. This call - * returns -1 if the entity cannot be mapped. Note that the string - * passed in must NOT have the leading "&" nor the trailing ";" - * in it. - */ - static PRInt32 EntityToUnicode(const nsAString& aEntity); - static PRInt32 EntityToUnicode(const nsCString& aEntity); - -/** - * Translate a unicode value into an entity string. This call - * returns null if the entity cannot be mapped. - * Note that the string returned DOES NOT have the leading "&" nor - * the trailing ";" in it. - */ - static const char* UnicodeToEntity(PRInt32 aUnicode); -}; - - -#endif /* nsHTMLEntities_h___ */ diff --git a/htmlparser/src/nsHTMLEntityList.h b/htmlparser/src/nsHTMLEntityList.h deleted file mode 100644 index cd42ae757089..000000000000 --- a/htmlparser/src/nsHTMLEntityList.h +++ /dev/null @@ -1,333 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1999 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -/****** - - This file contains the list of all HTML entities - See nsHTMLEntities.h for access to the enum values for entities - - It is designed to be used as inline input to nsHTMLEntities.cpp *only* - through the magic of C preprocessing. - - All entires must be enclosed in the macro HTML_ENTITY which will have cruel - and unusual things done to it - - It is recommended (but not strictly necessary) to keep all entries - in alphabetical order - - The first argument to HTML_ENTITY is the string value of the entity - The second argument it HTML_ENTITY is the unicode value of the entity - - ******/ - -// ISO 8859-1 entities. -// See the HTML4.0 spec for this list in it's DTD form -HTML_ENTITY(nbsp, 160) -HTML_ENTITY(iexcl, 161) -HTML_ENTITY(cent, 162) -HTML_ENTITY(pound, 163) -HTML_ENTITY(curren, 164) -HTML_ENTITY(yen, 165) -HTML_ENTITY(brvbar, 166) -HTML_ENTITY(sect, 167) -HTML_ENTITY(uml, 168) -HTML_ENTITY(copy, 169) -HTML_ENTITY(ordf, 170) -HTML_ENTITY(laquo, 171) -HTML_ENTITY(not, 172) -HTML_ENTITY(shy, 173) -HTML_ENTITY(reg, 174) -HTML_ENTITY(macr, 175) -HTML_ENTITY(deg, 176) -HTML_ENTITY(plusmn, 177) -HTML_ENTITY(sup2, 178) -HTML_ENTITY(sup3, 179) -HTML_ENTITY(acute, 180) -HTML_ENTITY(micro, 181) -HTML_ENTITY(para, 182) -HTML_ENTITY(middot, 183) -HTML_ENTITY(cedil, 184) -HTML_ENTITY(sup1, 185) -HTML_ENTITY(ordm, 186) -HTML_ENTITY(raquo, 187) -HTML_ENTITY(frac14, 188) -HTML_ENTITY(frac12, 189) -HTML_ENTITY(frac34, 190) -HTML_ENTITY(iquest, 191) -HTML_ENTITY(Agrave, 192) -HTML_ENTITY(Aacute, 193) -HTML_ENTITY(Acirc, 194) -HTML_ENTITY(Atilde, 195) -HTML_ENTITY(Auml, 196) -HTML_ENTITY(Aring, 197) -HTML_ENTITY(AElig, 198) -HTML_ENTITY(Ccedil, 199) -HTML_ENTITY(Egrave, 200) -HTML_ENTITY(Eacute, 201) -HTML_ENTITY(Ecirc, 202) -HTML_ENTITY(Euml, 203) -HTML_ENTITY(Igrave, 204) -HTML_ENTITY(Iacute, 205) -HTML_ENTITY(Icirc, 206) -HTML_ENTITY(Iuml, 207) -HTML_ENTITY(ETH, 208) -HTML_ENTITY(Ntilde, 209) -HTML_ENTITY(Ograve, 210) -HTML_ENTITY(Oacute, 211) -HTML_ENTITY(Ocirc, 212) -HTML_ENTITY(Otilde, 213) -HTML_ENTITY(Ouml, 214) -HTML_ENTITY(times, 215) -HTML_ENTITY(Oslash, 216) -HTML_ENTITY(Ugrave, 217) -HTML_ENTITY(Uacute, 218) -HTML_ENTITY(Ucirc, 219) -HTML_ENTITY(Uuml, 220) -HTML_ENTITY(Yacute, 221) -HTML_ENTITY(THORN, 222) -HTML_ENTITY(szlig, 223) -HTML_ENTITY(agrave, 224) -HTML_ENTITY(aacute, 225) -HTML_ENTITY(acirc, 226) -HTML_ENTITY(atilde, 227) -HTML_ENTITY(auml, 228) -HTML_ENTITY(aring, 229) -HTML_ENTITY(aelig, 230) -HTML_ENTITY(ccedil, 231) -HTML_ENTITY(egrave, 232) -HTML_ENTITY(eacute, 233) -HTML_ENTITY(ecirc, 234) -HTML_ENTITY(euml, 235) -HTML_ENTITY(igrave, 236) -HTML_ENTITY(iacute, 237) -HTML_ENTITY(icirc, 238) -HTML_ENTITY(iuml, 239) -HTML_ENTITY(eth, 240) -HTML_ENTITY(ntilde, 241) -HTML_ENTITY(ograve, 242) -HTML_ENTITY(oacute, 243) -HTML_ENTITY(ocirc, 244) -HTML_ENTITY(otilde, 245) -HTML_ENTITY(ouml, 246) -HTML_ENTITY(divide, 247) -HTML_ENTITY(oslash, 248) -HTML_ENTITY(ugrave, 249) -HTML_ENTITY(uacute, 250) -HTML_ENTITY(ucirc, 251) -HTML_ENTITY(uuml, 252) -HTML_ENTITY(yacute, 253) -HTML_ENTITY(thorn, 254) -HTML_ENTITY(yuml, 255) - -// Symbols, mathematical symbols and Greek letters -// See the HTML4.0 spec for this list in it's DTD form -HTML_ENTITY(fnof, 402) -HTML_ENTITY(Alpha, 913) -HTML_ENTITY(Beta, 914) -HTML_ENTITY(Gamma, 915) -HTML_ENTITY(Delta, 916) -HTML_ENTITY(Epsilon, 917) -HTML_ENTITY(Zeta, 918) -HTML_ENTITY(Eta, 919) -HTML_ENTITY(Theta, 920) -HTML_ENTITY(Iota, 921) -HTML_ENTITY(Kappa, 922) -HTML_ENTITY(Lambda, 923) -HTML_ENTITY(Mu, 924) -HTML_ENTITY(Nu, 925) -HTML_ENTITY(Xi, 926) -HTML_ENTITY(Omicron, 927) -HTML_ENTITY(Pi, 928) -HTML_ENTITY(Rho, 929) -HTML_ENTITY(Sigma, 931) -HTML_ENTITY(Tau, 932) -HTML_ENTITY(Upsilon, 933) -HTML_ENTITY(Phi, 934) -HTML_ENTITY(Chi, 935) -HTML_ENTITY(Psi, 936) -HTML_ENTITY(Omega, 937) -HTML_ENTITY(alpha, 945) -HTML_ENTITY(beta, 946) -HTML_ENTITY(gamma, 947) -HTML_ENTITY(delta, 948) -HTML_ENTITY(epsilon, 949) -HTML_ENTITY(zeta, 950) -HTML_ENTITY(eta, 951) -HTML_ENTITY(theta, 952) -HTML_ENTITY(iota, 953) -HTML_ENTITY(kappa, 954) -HTML_ENTITY(lambda, 955) -HTML_ENTITY(mu, 956) -HTML_ENTITY(nu, 957) -HTML_ENTITY(xi, 958) -HTML_ENTITY(omicron, 959) -HTML_ENTITY(pi, 960) -HTML_ENTITY(rho, 961) -HTML_ENTITY(sigmaf, 962) -HTML_ENTITY(sigma, 963) -HTML_ENTITY(tau, 964) -HTML_ENTITY(upsilon, 965) -HTML_ENTITY(phi, 966) -HTML_ENTITY(chi, 967) -HTML_ENTITY(psi, 968) -HTML_ENTITY(omega, 969) -HTML_ENTITY(thetasym, 977) -HTML_ENTITY(upsih, 978) -HTML_ENTITY(piv, 982) -HTML_ENTITY(bull, 8226) -HTML_ENTITY(hellip, 8230) -HTML_ENTITY(prime, 8242) -HTML_ENTITY(Prime, 8243) -HTML_ENTITY(oline, 8254) -HTML_ENTITY(frasl, 8260) -HTML_ENTITY(weierp, 8472) -HTML_ENTITY(image, 8465) -HTML_ENTITY(real, 8476) -HTML_ENTITY(trade, 8482) -HTML_ENTITY(alefsym, 8501) -HTML_ENTITY(larr, 8592) -HTML_ENTITY(uarr, 8593) -HTML_ENTITY(rarr, 8594) -HTML_ENTITY(darr, 8595) -HTML_ENTITY(harr, 8596) -HTML_ENTITY(crarr, 8629) -HTML_ENTITY(lArr, 8656) -HTML_ENTITY(uArr, 8657) -HTML_ENTITY(rArr, 8658) -HTML_ENTITY(dArr, 8659) -HTML_ENTITY(hArr, 8660) -HTML_ENTITY(forall, 8704) -HTML_ENTITY(part, 8706) -HTML_ENTITY(exist, 8707) -HTML_ENTITY(empty, 8709) -HTML_ENTITY(nabla, 8711) -HTML_ENTITY(isin, 8712) -HTML_ENTITY(notin, 8713) -HTML_ENTITY(ni, 8715) -HTML_ENTITY(prod, 8719) -HTML_ENTITY(sum, 8721) -HTML_ENTITY(minus, 8722) -HTML_ENTITY(lowast, 8727) -HTML_ENTITY(radic, 8730) -HTML_ENTITY(prop, 8733) -HTML_ENTITY(infin, 8734) -HTML_ENTITY(ang, 8736) -HTML_ENTITY(and, 8743) -HTML_ENTITY(or, 8744) -HTML_ENTITY(cap, 8745) -HTML_ENTITY(cup, 8746) -HTML_ENTITY(int, 8747) -HTML_ENTITY(there4, 8756) -HTML_ENTITY(sim, 8764) -HTML_ENTITY(cong, 8773) -HTML_ENTITY(asymp, 8776) -HTML_ENTITY(ne, 8800) -HTML_ENTITY(equiv, 8801) -HTML_ENTITY(le, 8804) -HTML_ENTITY(ge, 8805) -HTML_ENTITY(sub, 8834) -HTML_ENTITY(sup, 8835) -HTML_ENTITY(nsub, 8836) -HTML_ENTITY(sube, 8838) -HTML_ENTITY(supe, 8839) -HTML_ENTITY(oplus, 8853) -HTML_ENTITY(otimes, 8855) -HTML_ENTITY(perp, 8869) -HTML_ENTITY(sdot, 8901) -HTML_ENTITY(lceil, 8968) -HTML_ENTITY(rceil, 8969) -HTML_ENTITY(lfloor, 8970) -HTML_ENTITY(rfloor, 8971) -HTML_ENTITY(lang, 9001) -HTML_ENTITY(rang, 9002) -HTML_ENTITY(loz, 9674) -HTML_ENTITY(spades, 9824) -HTML_ENTITY(clubs, 9827) -HTML_ENTITY(hearts, 9829) -HTML_ENTITY(diams, 9830) - -// Markup-significant and internationalization characters -// See the HTML4.0 spec for this list in it's DTD form -HTML_ENTITY(quot, 34) -HTML_ENTITY(amp, 38) -HTML_ENTITY(lt, 60) -HTML_ENTITY(gt, 62) -HTML_ENTITY(OElig, 338) -HTML_ENTITY(oelig, 339) -HTML_ENTITY(Scaron, 352) -HTML_ENTITY(scaron, 353) -HTML_ENTITY(Yuml, 376) -HTML_ENTITY(circ, 710) -HTML_ENTITY(tilde, 732) -HTML_ENTITY(ensp, 8194) -HTML_ENTITY(emsp, 8195) -HTML_ENTITY(thinsp, 8201) -HTML_ENTITY(zwnj, 8204) -HTML_ENTITY(zwj, 8205) -HTML_ENTITY(lrm, 8206) -HTML_ENTITY(rlm, 8207) -HTML_ENTITY(ndash, 8211) -HTML_ENTITY(mdash, 8212) -HTML_ENTITY(lsquo, 8216) -HTML_ENTITY(rsquo, 8217) -HTML_ENTITY(sbquo, 8218) -HTML_ENTITY(ldquo, 8220) -HTML_ENTITY(rdquo, 8221) -HTML_ENTITY(bdquo, 8222) -HTML_ENTITY(dagger, 8224) -HTML_ENTITY(Dagger, 8225) -HTML_ENTITY(permil, 8240) -HTML_ENTITY(lsaquo, 8249) -HTML_ENTITY(rsaquo, 8250) -HTML_ENTITY(euro, 8364) - -// Navigator entity extensions -// This block of entities needs to be at the bottom of the list since it -// contains duplicate Unicode codepoints. The codepoint to entity name -// mapping (used by Composer) must ignores them, which occurs only -// because they are listed later. - -// apos is from XML -HTML_ENTITY(apos, 39) -// The capitalized versions are required to handle non-standard input. -HTML_ENTITY(AMP, 38) -HTML_ENTITY(COPY, 169) -HTML_ENTITY(GT, 62) -HTML_ENTITY(LT, 60) -HTML_ENTITY(QUOT, 34) -HTML_ENTITY(REG, 174) - diff --git a/htmlparser/src/nsHTMLTags.cpp b/htmlparser/src/nsHTMLTags.cpp deleted file mode 100644 index cac9ab4db859..000000000000 --- a/htmlparser/src/nsHTMLTags.cpp +++ /dev/null @@ -1,528 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "nsHTMLTags.h" -#include "nsCRT.h" -#include "nsReadableUtils.h" -#include "plhash.h" -#include "nsString.h" -#include "nsStaticAtom.h" - -// C++ sucks! There's no way to do this with a macro, at least not -// that I know, if you know how to do this with a macro then please do -// so... -static const PRUnichar sHTMLTagUnicodeName_a[] = - {'a', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_abbr[] = - {'a', 'b', 'b', 'r', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_acronym[] = - {'a', 'c', 'r', 'o', 'n', 'y', 'm', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_address[] = - {'a', 'd', 'd', 'r', 'e', 's', 's', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_applet[] = - {'a', 'p', 'p', 'l', 'e', 't', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_area[] = - {'a', 'r', 'e', 'a', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_b[] = - {'b', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_base[] = - {'b', 'a', 's', 'e', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_basefont[] = - {'b', 'a', 's', 'e', 'f', 'o', 'n', 't', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_bdo[] = - {'b', 'd', 'o', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_bgsound[] = - {'b', 'g', 's', 'o', 'u', 'n', 'd', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_big[] = - {'b', 'i', 'g', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_blink[] = - {'b', 'l', 'i', 'n', 'k', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_blockquote[] = - {'b', 'l', 'o', 'c', 'k', 'q', 'u', 'o', 't', 'e', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_body[] = - {'b', 'o', 'd', 'y', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_br[] = - {'b', 'r', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_button[] = - {'b', 'u', 't', 't', 'o', 'n', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_caption[] = - {'c', 'a', 'p', 't', 'i', 'o', 'n', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_center[] = - {'c', 'e', 'n', 't', 'e', 'r', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_cite[] = - {'c', 'i', 't', 'e', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_code[] = - {'c', 'o', 'd', 'e', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_col[] = - {'c', 'o', 'l', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_colgroup[] = - {'c', 'o', 'l', 'g', 'r', 'o', 'u', 'p', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_counter[] = - {'c', 'o', 'u', 'n', 't', 'e', 'r', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_dd[] = - {'d', 'd', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_del[] = - {'d', 'e', 'l', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_dfn[] = - {'d', 'f', 'n', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_dir[] = - {'d', 'i', 'r', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_div[] = - {'d', 'i', 'v', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_dl[] = - {'d', 'l', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_dt[] = - {'d', 't', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_em[] = - {'e', 'm', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_embed[] = - {'e', 'm', 'b', 'e', 'd', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_endnote[] = - {'e', 'n', 'd', 'n', 'o', 't', 'e', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_fieldset[] = - {'f', 'i', 'e', 'l', 'd', 's', 'e', 't', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_font[] = - {'f', 'o', 'n', 't', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_form[] = - {'f', 'o', 'r', 'm', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_frame[] = - {'f', 'r', 'a', 'm', 'e', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_frameset[] = - {'f', 'r', 'a', 'm', 'e', 's', 'e', 't', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_h1[] = - {'h', '1', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_h2[] = - {'h', '2', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_h3[] = - {'h', '3', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_h4[] = - {'h', '4', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_h5[] = - {'h', '5', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_h6[] = - {'h', '6', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_head[] = - {'h', 'e', 'a', 'd', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_hr[] = - {'h', 'r', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_html[] = - {'h', 't', 'm', 'l', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_i[] = - {'i', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_iframe[] = - {'i', 'f', 'r', 'a', 'm', 'e', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_image[] = - {'i', 'm', 'a', 'g', 'e', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_img[] = - {'i', 'm', 'g', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_input[] = - {'i', 'n', 'p', 'u', 't', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_ins[] = - {'i', 'n', 's', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_isindex[] = - {'i', 's', 'i', 'n', 'd', 'e', 'x', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_kbd[] = - {'k', 'b', 'd', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_keygen[] = - {'k', 'e', 'y', 'g', 'e', 'n', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_label[] = - {'l', 'a', 'b', 'e', 'l', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_legend[] = - {'l', 'e', 'g', 'e', 'n', 'd', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_li[] = - {'l', 'i', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_link[] = - {'l', 'i', 'n', 'k', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_listing[] = - {'l', 'i', 's', 't', 'i', 'n', 'g', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_map[] = - {'m', 'a', 'p', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_marquee[] = - {'m', 'a', 'r', 'q', 'u', 'e', 'e', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_menu[] = - {'m', 'e', 'n', 'u', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_meta[] = - {'m', 'e', 't', 'a', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_multicol[] = - {'m', 'u', 'l', 't', 'i', 'c', 'o', 'l', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_nobr[] = - {'n', 'o', 'b', 'r', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_noembed[] = - {'n', 'o', 'e', 'm', 'b', 'e', 'd', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_noframes[] = - {'n', 'o', 'f', 'r', 'a', 'm', 'e', 's', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_noscript[] = - {'n', 'o', 's', 'c', 'r', 'i', 'p', 't', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_object[] = - {'o', 'b', 'j', 'e', 'c', 't', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_ol[] = - {'o', 'l', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_optgroup[] = - {'o', 'p', 't', 'g', 'r', 'o', 'u', 'p', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_option[] = - {'o', 'p', 't', 'i', 'o', 'n', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_p[] = - {'p', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_param[] = - {'p', 'a', 'r', 'a', 'm', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_parsererror[] = - {'p', 'a', 'r', 's', 'e', 'r', 'e', 'r', 'r', 'o', 'r', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_plaintext[] = - {'p', 'l', 'a', 'i', 'n', 't', 'e', 'x', 't', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_pre[] = - {'p', 'r', 'e', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_q[] = - {'q', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_s[] = - {'s', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_samp[] = - {'s', 'a', 'm', 'p', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_script[] = - {'s', 'c', 'r', 'i', 'p', 't', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_select[] = - {'s', 'e', 'l', 'e', 'c', 't', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_server[] = - {'s', 'e', 'r', 'v', 'e', 'r', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_small[] = - {'s', 'm', 'a', 'l', 'l', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_sound[] = - {'s', 'o', 'u', 'n', 'd', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_sourcetext[] = - {'s', 'o', 'u', 'r', 'c', 'e', 't', 'e', 'x', 't', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_spacer[] = - {'s', 'p', 'a', 'c', 'e', 'r', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_span[] = - {'s', 'p', 'a', 'n', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_strike[] = - {'s', 't', 'r', 'i', 'k', 'e', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_strong[] = - {'s', 't', 'r', 'o', 'n', 'g', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_style[] = - {'s', 't', 'y', 'l', 'e', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_sub[] = - {'s', 'u', 'b', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_sup[] = - {'s', 'u', 'p', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_table[] = - {'t', 'a', 'b', 'l', 'e', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_tbody[] = - {'t', 'b', 'o', 'd', 'y', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_td[] = - {'t', 'd', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_textarea[] = - {'t', 'e', 'x', 't', 'a', 'r', 'e', 'a', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_tfoot[] = - {'t', 'f', 'o', 'o', 't', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_th[] = - {'t', 'h', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_thead[] = - {'t', 'h', 'e', 'a', 'd', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_title[] = - {'t', 'i', 't', 'l', 'e', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_tr[] = - {'t', 'r', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_tt[] = - {'t', 't', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_u[] = - {'u', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_ul[] = - {'u', 'l', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_var[] = - {'v', 'a', 'r', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_wbr[] = - {'w', 'b', 'r', '\0'}; -static const PRUnichar sHTMLTagUnicodeName_xmp[] = - {'x', 'm', 'p', '\0'}; - -// static array of unicode tag names -#define HTML_TAG(_tag, _classname) sHTMLTagUnicodeName_##_tag, -#define HTML_OTHER(_tag, _classname) -static const PRUnichar* const kTagUnicodeTable[] = { -#include "nsHTMLTagList.h" -}; -#undef HTML_TAG - -// static array of tag atoms -static nsIAtom* kTagAtomTable[eHTMLTag_userdefined - 1]; - -// static array of tag StaticAtom structs -#define HTML_TAG(_tag, _classname) { #_tag, &kTagAtomTable[eHTMLTag_##_tag - 1] }, -static const nsStaticAtom kTagAtoms_info[] = { -#include "nsHTMLTagList.h" -}; -#undef HTML_TAG -#undef HTML_OTHER - -static PRInt32 gTableRefCount; -static PLHashTable* gTagTable; - - -PR_STATIC_CALLBACK(PLHashNumber) -HTMLTagsHashCodeUCPtr(const void *key) -{ - const PRUnichar *str = (const PRUnichar *)key; - - return nsCRT::HashCode(str); -} - -PR_STATIC_CALLBACK(PRIntn) -HTMLTagsKeyCompareUCPtr(const void *key1, const void *key2) -{ - const PRUnichar *str1 = (const PRUnichar *)key1; - const PRUnichar *str2 = (const PRUnichar *)key2; - - return nsCRT::strcmp(str1, str2) == 0; -} - - -static PRUint32 sMaxTagNameLength; -#define NS_HTMLTAG_NAME_MAX_LENGTH 11 - -// static -nsresult -nsHTMLTags::AddRefTable(void) -{ - if (gTableRefCount++ == 0) { - NS_ASSERTION(!gTagTable, "pre existing hash!"); - - gTagTable = PL_NewHashTable(64, HTMLTagsHashCodeUCPtr, - HTMLTagsKeyCompareUCPtr, PL_CompareValues, - nsnull, nsnull); - NS_ENSURE_TRUE(gTagTable, NS_ERROR_OUT_OF_MEMORY); - - // Fill in gTagTable with the above static PRUnichar strings as - // keys and the value of the corresponding enum as the value in - // the table. - - PRInt32 i; - for (i = 0; i < NS_HTML_TAG_MAX; ++i) { - PRUint32 len = nsCRT::strlen(kTagUnicodeTable[i]); - - PL_HashTableAdd(gTagTable, kTagUnicodeTable[i], - NS_INT32_TO_PTR(i + 1)); - - if (len > sMaxTagNameLength) { - sMaxTagNameLength = len; - } - } - - NS_ASSERTION(sMaxTagNameLength == NS_HTMLTAG_NAME_MAX_LENGTH, - "NS_HTMLTAG_NAME_MAX_LENGTH not set correctly!"); - - // Fill in our static atom pointers - NS_RegisterStaticAtoms(kTagAtoms_info, NS_ARRAY_LENGTH(kTagAtoms_info)); - -#ifdef DEBUG - { - // let's verify that all names in the the table are lowercase... - for (i = 0; i < NS_HTML_TAG_MAX; ++i) { - nsCAutoString temp1(kTagAtoms_info[i].mString); - nsCAutoString temp2(kTagAtoms_info[i].mString); - ToLowerCase(temp1); - NS_ASSERTION(temp1.Equals(temp2), "upper case char in table"); - } - - // let's verify that all names in the unicode strings above are - // correct. - for (i = 0; i < NS_HTML_TAG_MAX; ++i) { - nsAutoString temp1(kTagUnicodeTable[i]); - nsAutoString temp2; temp2.AssignWithConversion(kTagAtoms_info[i].mString); - NS_ASSERTION(temp1.Equals(temp2), "Bad unicode tag name!"); - } - } -#endif - } - - return NS_OK; -} - -// static -void -nsHTMLTags::ReleaseTable(void) -{ - if (0 == --gTableRefCount) { - if (gTagTable) { - // Nothing to delete/free in this table, just destroy the table. - - PL_HashTableDestroy(gTagTable); - - gTagTable = nsnull; - } - } -} - -// static -nsHTMLTag -nsHTMLTags::CaseSensitiveLookupTag(const PRUnichar* aTagName) -{ - NS_ASSERTION(gTagTable, "no lookup table, needs addref"); - NS_ASSERTION(aTagName, "null tagname!"); - - PRUint32 tag = NS_PTR_TO_INT32(PL_HashTableLookupConst(gTagTable, aTagName)); - - return (nsHTMLTag)tag; -} - -// static -nsHTMLTag -nsHTMLTags::LookupTag(const nsAString& aTagName) -{ - PRUint32 length = aTagName.Length(); - - if (length > sMaxTagNameLength) { - return eHTMLTag_userdefined; - } - - static PRUnichar buf[NS_HTMLTAG_NAME_MAX_LENGTH + 1]; - - nsAString::const_iterator iter; - PRUint32 i = 0; - PRUnichar c; - - aTagName.BeginReading(iter); - - // Fast lowercasing-while-copying of ASCII characters into a - // PRUnichar buffer - - while (i < length) { - c = *iter; - - if (c <= 'Z' && c >= 'A') { - c |= 0x20; // Lowercase the ASCII character. - } - - buf[i] = c; // Copy ASCII character. - - ++i; - ++iter; - } - - buf[i] = 0; - - nsHTMLTag tag = CaseSensitiveLookupTag(buf); - - // hack: this can come out when rickg provides a way for the editor to ask - // CanContain() questions without having to first fetch the parsers - // internal enum values for a tag name. - - // Hmm, this hack would be faster if we'd put these strings in the - // hash table. But maybe it's not worth it... - - if (tag == eHTMLTag_unknown) { - // "__moz_text" - static const PRUnichar moz_text[] = - {'_', '_', 'm', 'o', 'z', '_', 't', 'e', 'x', 't', PRUnichar(0) }; - - // "#text" - static const PRUnichar text[] = - {'#', 't', 'e', 'x', 't', PRUnichar(0) }; - - if (nsCRT::strcmp(buf, moz_text) == 0) { - tag = eHTMLTag_text; - } else if (nsCRT::strcmp(buf, text) == 0) { - tag = eHTMLTag_text; - } else { - tag = eHTMLTag_userdefined; - } - } - - return tag; -} - -// static -const PRUnichar * -nsHTMLTags::GetStringValue(nsHTMLTag aEnum) -{ - if (aEnum <= eHTMLTag_unknown || aEnum > NS_HTML_TAG_MAX) { - return nsnull; - } - - return kTagUnicodeTable[aEnum - 1]; -} - -// static -nsIAtom * -nsHTMLTags::GetAtom(nsHTMLTag aEnum) -{ - if (aEnum <= eHTMLTag_unknown || aEnum > NS_HTML_TAG_MAX) { - return nsnull; - } - - return kTagAtomTable[aEnum - 1]; -} - - -#ifdef NS_DEBUG - -// tag table verification class. - -class nsTestTagTable { -public: - nsTestTagTable() { - const PRUnichar *tag; - nsHTMLTag id; - - nsHTMLTags::AddRefTable(); - // Make sure we can find everything we are supposed to - for (int i = 0; i < NS_HTML_TAG_MAX; ++i) { - tag = kTagUnicodeTable[i]; - id = nsHTMLTags::LookupTag(nsDependentString(tag)); - NS_ASSERTION(id != eHTMLTag_userdefined, "can't find tag id"); - const PRUnichar* check = nsHTMLTags::GetStringValue(id); - NS_ASSERTION(0 == nsCRT::strcmp(check, tag), "can't map id back to tag"); - } - - // Make sure we don't find things that aren't there - id = nsHTMLTags::LookupTag(NS_LITERAL_STRING("@")); - NS_ASSERTION(id == eHTMLTag_userdefined, "found @"); - id = nsHTMLTags::LookupTag(NS_LITERAL_STRING("zzzzz")); - NS_ASSERTION(id == eHTMLTag_userdefined, "found zzzzz"); - - tag = nsHTMLTags::GetStringValue((nsHTMLTag) 0); - NS_ASSERTION(!tag, "found enum 0"); - tag = nsHTMLTags::GetStringValue((nsHTMLTag) -1); - NS_ASSERTION(!tag, "found enum -1"); - tag = nsHTMLTags::GetStringValue((nsHTMLTag) (NS_HTML_TAG_MAX + 1)); - NS_ASSERTION(!tag, "found past max enum"); - - nsHTMLTags::ReleaseTable(); - } -}; - -static const nsTestTagTable validateTagTable; - -#endif diff --git a/htmlparser/src/nsHTMLTokenizer.cpp b/htmlparser/src/nsHTMLTokenizer.cpp deleted file mode 100644 index 8ea43b99ca12..000000000000 --- a/htmlparser/src/nsHTMLTokenizer.cpp +++ /dev/null @@ -1,1080 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - - -/** - * MODULE NOTES: - * @update gess 4/1/98 - * - */ - -#include "nsIAtom.h" -#include "nsHTMLTokenizer.h" -#include "nsScanner.h" -#include "nsElementTable.h" -#include "CParserContext.h" -#include "nsReadableUtils.h" -#include "nsUnicharUtils.h" - -/************************************************************************ - And now for the main class -- nsHTMLTokenizer... - ************************************************************************/ - -static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); -static NS_DEFINE_IID(kITokenizerIID, NS_ITOKENIZER_IID); -static NS_DEFINE_IID(kClassIID, NS_HTMLTOKENIZER_IID); - -/** - * This method gets called as part of our COM-like interfaces. - * Its purpose is to create an interface to parser object - * of some type. - * - * @update gess 4/8/98 - * @param nsIID id of object to discover - * @param aInstancePtr ptr to newly discovered interface - * @return NS_xxx result code - */ -nsresult nsHTMLTokenizer::QueryInterface(const nsIID& aIID, void** aInstancePtr) -{ - if (NULL == aInstancePtr) { - return NS_ERROR_NULL_POINTER; - } - - if(aIID.Equals(kISupportsIID)) { //do IUnknown... - *aInstancePtr = (nsISupports*)(this); - } - else if(aIID.Equals(kITokenizerIID)) { //do IParser base class... - *aInstancePtr = (nsITokenizer*)(this); - } - else if(aIID.Equals(kClassIID)) { //do this class... - *aInstancePtr = (nsHTMLTokenizer*)(this); - } - else { - *aInstancePtr=0; - return NS_NOINTERFACE; - } - NS_ADDREF_THIS(); - return NS_OK; -} - -/** - * This method is defined in nsHTMLTokenizer.h. It is used to - * cause the COM-like construction of an HTMLTokenizer. - * - * @update gess 4/8/98 - * @param nsIParser** ptr to newly instantiated parser - * @return NS_xxx error result - */ - -nsresult NS_NewHTMLTokenizer(nsITokenizer** aInstancePtrResult, - PRInt32 aFlag, - eParserDocType aDocType, - eParserCommands aCommand) -{ - NS_PRECONDITION(nsnull != aInstancePtrResult, "null ptr"); - if (nsnull == aInstancePtrResult) { - return NS_ERROR_NULL_POINTER; - } - nsHTMLTokenizer* it = new nsHTMLTokenizer(aFlag,aDocType,aCommand); - if (nsnull == it) { - return NS_ERROR_OUT_OF_MEMORY; - } - return it->QueryInterface(kClassIID, (void **) aInstancePtrResult); -} - - -NS_IMPL_ADDREF(nsHTMLTokenizer) -NS_IMPL_RELEASE(nsHTMLTokenizer) - - -/** - * Default constructor - * - * @update gess 4/9/98 - * @param - * @return - */ - nsHTMLTokenizer::nsHTMLTokenizer(PRInt32 aParseMode, - eParserDocType aDocType, - eParserCommands aCommand) : - nsITokenizer(), mTokenDeque(0) -{ - if (aParseMode==eDTDMode_full_standards || - aParseMode==eDTDMode_almost_standards) { - mFlags = NS_IPARSER_FLAG_STRICT_MODE; - } - else if (aParseMode==eDTDMode_quirks) { - mFlags = NS_IPARSER_FLAG_QUIRKS_MODE; - } - else if (aParseMode==eDTDMode_autodetect) { - mFlags = NS_IPARSER_FLAG_AUTO_DETECT_MODE; - } - else { - mFlags = NS_IPARSER_FLAG_UNKNOWN_MODE; - } - - if (aDocType==ePlainText) { - mFlags |= NS_IPARSER_FLAG_PLAIN_TEXT; - } - else if (aDocType==eXML) { - mFlags |= NS_IPARSER_FLAG_XML; - } - else if (aDocType==eHTML_Quirks || - aDocType==eHTML3_Quirks || - aDocType==eHTML_Strict) { - mFlags |= NS_IPARSER_FLAG_HTML; - } - - mFlags |= (aCommand==eViewSource)? NS_IPARSER_FLAG_VIEW_SOURCE:NS_IPARSER_FLAG_VIEW_NORMAL; - - mTokenAllocator = nsnull; - mTokenScanPos = 0; - mPreserveTarget = eHTMLTag_unknown; -} - - -/** - * Destructor - * - * @update gess 4/9/98 - * @param - * @return - */ -nsHTMLTokenizer::~nsHTMLTokenizer(){ - if(mTokenDeque.GetSize()){ - CTokenDeallocator theDeallocator(mTokenAllocator->GetArenaPool()); - mTokenDeque.ForEach(theDeallocator); - } -} - - -/******************************************************************* - Here begins the real working methods for the tokenizer. - *******************************************************************/ - -void nsHTMLTokenizer::AddToken(CToken*& aToken,nsresult aResult,nsDeque* aDeque,nsTokenAllocator* aTokenAllocator) { - if(aToken && aDeque) { - if(NS_SUCCEEDED(aResult)) { - aDeque->Push(aToken); - } - else { - IF_FREE(aToken, aTokenAllocator); - } - } -} - -/** - * Retrieve a ptr to the global token recycler... - * @update gess8/4/98 - * @return ptr to recycler (or null) - */ -nsTokenAllocator* nsHTMLTokenizer::GetTokenAllocator(void) { - return mTokenAllocator; -} - - -/** - * This method provides access to the topmost token in the tokenDeque. - * The token is not really removed from the list. - * @update gess8/2/98 - * @return ptr to token - */ -CToken* nsHTMLTokenizer::PeekToken() { - return (CToken*)mTokenDeque.PeekFront(); -} - - -/** - * This method provides access to the topmost token in the tokenDeque. - * The token is really removed from the list; if the list is empty we return 0. - * @update gess8/2/98 - * @return ptr to token or NULL - */ -CToken* nsHTMLTokenizer::PopToken() { - CToken* result=nsnull; - result=(CToken*)mTokenDeque.PopFront(); - return result; -} - - -/** - * - * @update gess8/2/98 - * @param - * @return - */ -CToken* nsHTMLTokenizer::PushTokenFront(CToken* theToken) { - mTokenDeque.PushFront(theToken); - return theToken; -} - -/** - * - * @update gess8/2/98 - * @param - * @return - */ -CToken* nsHTMLTokenizer::PushToken(CToken* theToken) { - mTokenDeque.Push(theToken); - return theToken; -} - -/** - * - * @update gess12/29/98 - * @param - * @return - */ -PRInt32 nsHTMLTokenizer::GetCount(void) { - return mTokenDeque.GetSize(); -} - -/** - * - * @update gess12/29/98 - * @param - * @return - */ -CToken* nsHTMLTokenizer::GetTokenAt(PRInt32 anIndex){ - return (CToken*)mTokenDeque.ObjectAt(anIndex); -} - -/** - * @update gess 12/29/98 - * @update harishd 08/04/00 - * @param - * @return - */ -nsresult nsHTMLTokenizer::WillTokenize(PRBool aIsFinalChunk,nsTokenAllocator* aTokenAllocator) -{ - mTokenAllocator=aTokenAllocator; - mIsFinalChunk=aIsFinalChunk; - mTokenScanPos=mTokenDeque.GetSize(); //cause scanDocStructure to search from here for new tokens... - return NS_OK; -} - -/** - * - * @update gess12/29/98 - * @param - * @return - */ -void nsHTMLTokenizer::PrependTokens(nsDeque& aDeque){ - - PRInt32 aCount=aDeque.GetSize(); - - //last but not least, let's check the misplaced content list. - //if we find it, then we have to push it all into the body before continuing... - PRInt32 anIndex=0; - for(anIndex=0;anIndex<aCount;++anIndex){ - CToken* theToken=(CToken*)aDeque.Pop(); - PushTokenFront(theToken); - } - -} - -NS_IMETHODIMP -nsHTMLTokenizer::CopyState(nsITokenizer* aTokenizer) -{ - if (aTokenizer) { - mFlags &= ~NS_IPARSER_FLAG_PRESERVE_CONTENT; - mPreserveTarget = - NS_STATIC_CAST(nsHTMLTokenizer*, aTokenizer)->mPreserveTarget; - if (mPreserveTarget != eHTMLTag_unknown) - mFlags |= NS_IPARSER_FLAG_PRESERVE_CONTENT; - } - return NS_OK; -} - -/** - * This is a utilty method for ScanDocStructure, which finds a given - * tag in the stack. - * - * @update gess 08/30/00 - * @param aTag -- the ID of the tag we're seeking - * @param aTagStack -- the stack to be searched - * @return index pos of tag in stack if found, otherwise kNotFound - */ -static PRInt32 FindLastIndexOfTag(eHTMLTags aTag,nsDeque &aTagStack) { - PRInt32 theCount=aTagStack.GetSize(); - - while(0<theCount) { - CHTMLToken *theToken=(CHTMLToken*)aTagStack.ObjectAt(--theCount); - if(theToken) { - eHTMLTags theTag=(eHTMLTags)theToken->GetTypeID(); - if(theTag==aTag) { - return theCount; - } - } - } - - return kNotFound; -} - -/** - * This method scans the sequence of tokens to determine the - * well formedness of each tag structure. This is used to - * disable residual-style handling in well formed cases. - * - * @update gess 1Sep2000 - * @param - * @return - */ -nsresult nsHTMLTokenizer::ScanDocStructure(PRBool aFinalChunk) { - nsresult result=NS_OK; - if (!mTokenDeque.GetSize()) - return result; - - CHTMLToken *theRootToken=0; - - //*** start by finding the first start tag that hasn't been reviewed. - - while(mTokenScanPos>0) { - theRootToken=(CHTMLToken*)mTokenDeque.ObjectAt(mTokenScanPos); - if(theRootToken) { - eHTMLTokenTypes theType=eHTMLTokenTypes(theRootToken->GetTokenType()); - if(eToken_start==theType) { - if(eFormUnknown==theRootToken->GetContainerInfo()) { - break; - } - } - } - mTokenScanPos--; - } - - /*---------------------------------------------------------------------- - * Now that we know where to start, let's walk through the - * tokens to see which are well-formed. Stop when you run out - * of fresh tokens. - *---------------------------------------------------------------------*/ - - theRootToken=(CHTMLToken*)mTokenDeque.ObjectAt(mTokenScanPos); //init to root - - nsDeque theStack(0); - eHTMLTags theRootTag=eHTMLTag_unknown; - CHTMLToken *theToken=theRootToken; //init to root - PRInt32 theStackDepth=0; - - static const PRInt32 theMaxStackDepth=200; //dont bother if we get ridiculously deep. - - while(theToken && (theStackDepth<theMaxStackDepth)) { - - eHTMLTokenTypes theType=eHTMLTokenTypes(theToken->GetTokenType()); - eHTMLTags theTag=(eHTMLTags)theToken->GetTypeID(); - - PRBool theTagIsContainer=nsHTMLElement::IsContainer(theTag); //bug54117... - - if(theTagIsContainer) { - PRBool theTagIsBlock=gHTMLElements[theTag].IsMemberOf(kBlockEntity); - PRBool theTagIsInline= (theTagIsBlock) ? PR_FALSE : gHTMLElements[theTag].IsMemberOf(kInlineEntity); - - if(theTagIsBlock || theTagIsInline || (eHTMLTag_table==theTag)) { - - switch(theType) { - - case eToken_start: - if(0==theStack.GetSize()) { - //track the tag on the top of the stack... - theRootToken=theToken; - theRootTag=theTag; - } - theStack.Push(theToken); - ++theStackDepth; - break; - - case eToken_end: - { - CHTMLToken *theLastToken= NS_STATIC_CAST(CHTMLToken*, theStack.Peek()); - if(theLastToken) { - if(theTag==theLastToken->GetTypeID()) { - theStack.Pop(); //yank it for real - theStackDepth--; - theLastToken->SetContainerInfo(eWellFormed); - - //in addition, let's look above this container to see if we can find - //any tags that are already marked malformed. If so, pop them too! - - theLastToken= NS_STATIC_CAST(CHTMLToken*, theStack.Peek()); - while(theLastToken) { - if(eMalformed==theRootToken->GetContainerInfo()) { - theStack.Pop(); //yank the malformed token for real. - theLastToken= NS_STATIC_CAST(CHTMLToken*, theStack.Peek()); - continue; - } - break; - } - } - else { - //the topmost token isn't what we expected, so that container must - //be malformed. If the tag is a block, we don't really care (but we'll - //mark it anyway). If it's an inline we DO care, especially if the - //inline tried to contain a block (that's when RS handling kicks in). - if(theTagIsInline) { - PRInt32 theIndex=FindLastIndexOfTag(theTag,theStack); - if(kNotFound!=theIndex) { - theToken=(CHTMLToken*)theStack.ObjectAt(theIndex); - theToken->SetContainerInfo(eMalformed); - } - //otherwise we ignore an out-of-place end tag. - } - else { - } - } - } - } - break; - - default: - break; - } //switch - - } - } - - theToken=(CHTMLToken*)mTokenDeque.ObjectAt(++mTokenScanPos); - } - - return result; -} - -nsresult nsHTMLTokenizer::DidTokenize(PRBool aFinalChunk) { - return ScanDocStructure(aFinalChunk); -} - -/** - * This method repeatedly called by the tokenizer. - * Each time, we determine the kind of token were about to - * read, and then we call the appropriate method to handle - * that token type. - * - * @update gess 3/25/98 - * @param aChar: last char read - * @param aScanner: see nsScanner.h - * @param anErrorCode: arg that will hold error condition - * @return new token or null - */ -nsresult nsHTMLTokenizer::ConsumeToken(nsScanner& aScanner,PRBool& aFlushTokens) { - - PRUnichar theChar; - CToken* theToken=0; - - nsresult result=aScanner.Peek(theChar); - - switch(result) { - case kEOF: - //We convert from eof to complete here, because we never really tried to get data. - //All we did was try to see if data was available, which it wasn't. - //It's important to return process complete, so that controlling logic can know that - //everything went well, but we're done with token processing. - return result; - - case NS_OK: - default: - - if(!(mFlags & NS_IPARSER_FLAG_PLAIN_TEXT)) { - if(kLessThan==theChar) { - return ConsumeTag(theChar,theToken,aScanner,aFlushTokens); - } - else if(kAmpersand==theChar){ - return ConsumeEntity(theChar,theToken,aScanner); - } - } - - if((kCR==theChar) || (kLF==theChar)) { - return ConsumeNewline(theChar,theToken,aScanner); - } - else { - if(!nsCRT::IsAsciiSpace(theChar)) { - if(theChar!=nsnull) { - result=ConsumeText(theToken,aScanner); - } - else { - aScanner.GetChar(theChar); // skip the embedded null char. Fix bug 64098. - } - break; - } - result=ConsumeWhitespace(theChar,theToken,aScanner); - } - break; - } //switch - - return result; -} - - -/** - * This method is called just after a "<" has been consumed - * and we know we're at the start of some kind of tagged - * element. We don't know yet if it's a tag or a comment. - * - * @update gess 5/12/98 - * @param aChar is the last char read - * @param aScanner is represents our input source - * @param aToken is the out arg holding our new token - * @return error code. - */ -nsresult nsHTMLTokenizer::ConsumeTag(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner,PRBool& aFlushTokens) { - - PRUnichar theNextChar, oldChar; - nsresult result=aScanner.Peek(aChar,1); - - if(NS_OK==result) { - - switch(aChar) { - case kForwardSlash: - // Get the original "<" (we've already seen it with a Peek) - aScanner.GetChar(oldChar); - - result=aScanner.Peek(theNextChar, 1); - if(NS_OK==result) { - // xml allow non ASCII tag name, consume as end tag. need to make xml view source work - PRBool isXML=(mFlags & NS_IPARSER_FLAG_XML); - if(nsCRT::IsAsciiAlpha(theNextChar)||(kGreaterThan==theNextChar)|| - (isXML && (! nsCRT::IsAscii(theNextChar)))) { - result=ConsumeEndTag(aChar,aToken,aScanner); - } - else result=ConsumeComment(aChar,aToken,aScanner); - }//if - break; - - case kExclamation: - // Get the original "<" (we've already seen it with a Peek) - aScanner.GetChar(oldChar); - - result=aScanner.Peek(theNextChar, 1); - if(NS_OK==result) { - if((kMinus==theNextChar) || (kGreaterThan==theNextChar)) { - result=ConsumeComment(aChar,aToken,aScanner); - } - else - result=ConsumeSpecialMarkup(aChar,aToken,aScanner); - } - break; - - case kQuestionMark: //it must be an XML processing instruction... - // Get the original "<" (we've already seen it with a Peek) - aScanner.GetChar(oldChar); - result=ConsumeProcessingInstruction(aChar,aToken,aScanner); - break; - - default: - if(nsCRT::IsAsciiAlpha(aChar)) { - // Get the original "<" (we've already seen it with a Peek) - aScanner.GetChar(oldChar); - result=ConsumeStartTag(aChar,aToken,aScanner,aFlushTokens); - } - else { - // We are not dealing with a tag. So, don't consume the original - // char and leave the decision to ConsumeText(). - result=ConsumeText(aToken,aScanner); - } - } //switch - - } //if - return result; -} - -/** - * This method is called just after we've consumed a start - * tag, and we now have to consume its attributes. - * - * @update rickg 03.23.2000 - * @param aChar: last char read - * @param aScanner: see nsScanner.h - * @param aLeadingWS: contains ws chars that preceeded the first attribute - * @return - */ -nsresult nsHTMLTokenizer::ConsumeAttributes(PRUnichar aChar, - CToken* aToken, - nsScanner& aScanner) { - PRBool done=PR_FALSE; - nsresult result=NS_OK; - PRInt16 theAttrCount=0; - - nsTokenAllocator* theAllocator=this->GetTokenAllocator(); - - while((!done) && (result==NS_OK)) { - CAttributeToken* theToken= NS_STATIC_CAST(CAttributeToken*, theAllocator->CreateTokenOfType(eToken_attribute,eHTMLTag_unknown)); - if(theToken){ - result=theToken->Consume(aChar,aScanner,mFlags); //tell new token to finish consuming text... - - //Much as I hate to do this, here's some special case code. - //This handles the case of empty-tags in XML. Our last - //attribute token will come through with a text value of "" - //and a textkey of "/". We should destroy it, and tell the - //start token it was empty. - if(NS_SUCCEEDED(result)) { - PRBool isUsableAttr = PR_TRUE; - const nsAString& key=theToken->GetKey(); - const nsAString& text=theToken->GetValue(); - - // support XML like syntax to fix bugs like 44186 - if(!key.IsEmpty() && kForwardSlash==key.First() && text.IsEmpty()) { - isUsableAttr = PRBool(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE); // Fix bug 103095 - aToken->SetEmpty(isUsableAttr); - } - if(isUsableAttr) { - ++theAttrCount; - AddToken((CToken*&)theToken,result,&mTokenDeque,theAllocator); - } - else { - IF_FREE(theToken, mTokenAllocator); - } - } - else { //if(NS_ERROR_HTMLPARSER_BADATTRIBUTE==result){ - aToken->SetEmpty(PR_TRUE); - IF_FREE(theToken, mTokenAllocator); - if(NS_ERROR_HTMLPARSER_BADATTRIBUTE==result) - result=NS_OK; - } - }//if - -#ifdef DEBUG - if(NS_SUCCEEDED(result)){ - PRInt32 newline = 0; - result = aScanner.SkipWhitespace(newline); - NS_ASSERTION(newline == 0, "CAttribute::Consume() failed to collect all the newlines!"); - } -#endif - if (NS_SUCCEEDED(result)) { - result = aScanner.Peek(aChar); - if (NS_SUCCEEDED(result)) { - if (aChar == kGreaterThan) { //you just ate the '>' - aScanner.GetChar(aChar); //skip the '>' - done = PR_TRUE; - } - else if(aChar == kLessThan) { - done = PR_TRUE; - } - }//if - }//if - }//while - - aToken->SetAttributeCount(theAttrCount); - return result; -} - -/** - * - * @update gess12/28/98 - * @param - * @return - */ -nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner,PRBool& aFlushTokens) { - PRInt32 theDequeSize=mTokenDeque.GetSize(); //remember this for later in case you have to unwind... - nsresult result=NS_OK; - - nsTokenAllocator* theAllocator=this->GetTokenAllocator(); - aToken=theAllocator->CreateTokenOfType(eToken_start,eHTMLTag_unknown); - - if(aToken) { - // Save the position after '<' for use in recording traling contents. Ref: Bug. 15204. - nsScannerIterator origin; - aScanner.CurrentPosition(origin); - - result= aToken->Consume(aChar,aScanner,mFlags); //tell new token to finish consuming text... - - if(NS_SUCCEEDED(result)) { - - AddToken(aToken,result,&mTokenDeque,theAllocator); - NS_ENSURE_SUCCESS(result, result); - - eHTMLTags theTag=(eHTMLTags)aToken->GetTypeID(); - - //Good. Now, let's see if the next char is ">". - //If so, we have a complete tag, otherwise, we have attributes. - result = aScanner.Peek(aChar); - NS_ENSURE_SUCCESS(result, result); - - if(kGreaterThan != aChar) { //look for '>' - result = ConsumeAttributes(aChar, aToken, aScanner); - } //if - else { - aScanner.GetChar(aChar); - } - - /* Now that that's over with, we have one more problem to solve. - In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and - consume all the content itself. - */ - if(NS_SUCCEEDED(result)) { - CStartToken* theStartToken = NS_STATIC_CAST(CStartToken*,aToken); - //XXX - Find a better soution to record content - //Added _plaintext to fix bug 46054. - if(!(mFlags & NS_IPARSER_FLAG_PRESERVE_CONTENT) && - (theTag == eHTMLTag_textarea || - theTag == eHTMLTag_xmp || - theTag == eHTMLTag_plaintext || - theTag == eHTMLTag_noscript || - theTag == eHTMLTag_noframes)) { - NS_ASSERTION(mPreserveTarget == eHTMLTag_unknown, - "mPreserveTarget set but not preserving content?"); - mPreserveTarget = theTag; - mFlags |= NS_IPARSER_FLAG_PRESERVE_CONTENT; - } - - if (mFlags & NS_IPARSER_FLAG_PRESERVE_CONTENT) - PreserveToken(theStartToken, aScanner, origin); - - //if((eHTMLTag_style==theTag) || (eHTMLTag_script==theTag)) { - if(gHTMLElements[theTag].CanContainType(kCDATA)) { - nsAutoString endTagName; - endTagName.Assign(nsHTMLTags::GetStringValue(theTag)); - - CToken* text=theAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text); - CTextToken* textToken=NS_STATIC_CAST(CTextToken*,text); - result=textToken->ConsumeUntil(0,theTag!=eHTMLTag_script,aScanner,endTagName,mFlags,aFlushTokens); //tell new token to finish consuming text... - - // Fix bug 44186 - // Support XML like syntax, i.e., <script src="external.js"/> == <script src="external.js"></script> - // Note: if aFlushTokens is TRUE then we have seen an </script> - // We do NOT want to output the end token if we didn't see a - // </script> and have a preserve target. If that happens, then we'd - // be messing up the text inside the <textarea> or <xmp> or whatever - // it is. - if((!(mFlags & NS_IPARSER_FLAG_PRESERVE_CONTENT) && - !theStartToken->IsEmpty()) || aFlushTokens) { - theStartToken->SetEmpty(PR_FALSE); // Setting this would make cases like <script/>d.w("text");</script> work. - CToken* endToken=theAllocator->CreateTokenOfType(eToken_end,theTag,endTagName); - AddToken(text,result,&mTokenDeque,theAllocator); - AddToken(endToken,result,&mTokenDeque,theAllocator); - } - else { - IF_FREE(text, mTokenAllocator); - } - } - } - - //EEEEECCCCKKKK!!! - //This code is confusing, so pay attention. - //If you're here, it's because we were in the midst of consuming a start - //tag but ran out of data (not in the stream, but in this *part* of the stream. - //For simplicity, we have to unwind our input. Therefore, we pop and discard - //any new tokens we've cued this round. Later we can get smarter about this. - if(NS_FAILED(result)) { - while(mTokenDeque.GetSize()>theDequeSize) { - CToken* theToken=(CToken*)mTokenDeque.Pop(); - IF_FREE(theToken, mTokenAllocator); - } - } - } //if - else IF_FREE(aToken, mTokenAllocator); - } //if - return result; -} - -/** - * - * @update gess12/28/98 - * @param - * @return - */ -nsresult nsHTMLTokenizer::ConsumeEndTag(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner) { - - // Get the "/" (we've already seen it with a Peek) - aScanner.GetChar(aChar); - - nsTokenAllocator* theAllocator=this->GetTokenAllocator(); - aToken=theAllocator->CreateTokenOfType(eToken_end,eHTMLTag_unknown); - nsresult result=NS_OK; - - if(aToken) { - result= aToken->Consume(aChar,aScanner,mFlags); //tell new token to finish consuming text... - AddToken(aToken,result,&mTokenDeque,theAllocator); - NS_ENSURE_SUCCESS(result, result); - - result = aScanner.Peek(aChar); - NS_ENSURE_SUCCESS(result, result); - - if(kGreaterThan != aChar) { - result = ConsumeAttributes(aChar, aToken, aScanner); - NS_ENSURE_SUCCESS(result, result); - } - else { - aScanner.GetChar(aChar); - } - - if (NS_SUCCEEDED(result)) { - eHTMLTags theTag = (eHTMLTags)aToken->GetTypeID(); - if (mPreserveTarget == theTag) { - // Target reached. Stop preserving content. - mPreserveTarget = eHTMLTag_unknown; - mFlags &= ~NS_IPARSER_FLAG_PRESERVE_CONTENT; - } - } - } //if - return result; -} - -/** - * This method is called just after a "&" has been consumed - * and we know we're at the start of an entity. - * - * @update gess 3/25/98 - * @param aChar: last char read - * @param aScanner: see nsScanner.h - * @param anErrorCode: arg that will hold error condition - * @return new token or null - */ -nsresult nsHTMLTokenizer::ConsumeEntity(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner) { - PRUnichar theChar; - nsresult result=aScanner.Peek(theChar, 1); - - nsTokenAllocator* theAllocator=this->GetTokenAllocator(); - if (NS_SUCCEEDED(result)) { - if (nsCRT::IsAsciiAlpha(theChar) || theChar==kHashsign) { - aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity); - result=aToken->Consume(theChar,aScanner,mFlags); - - if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) { - IF_FREE(aToken, mTokenAllocator); - } - else { - if (mIsFinalChunk && result == kEOF) { - result=NS_OK; //use as much of the entity as you can get. - } - AddToken(aToken,result,&mTokenDeque,theAllocator); - return result; - } - } - // oops, we're actually looking at plain text... - result = ConsumeText(aToken,aScanner); - }//if - return result; -} - - -/** - * This method is called just after whitespace has been - * consumed and we know we're at the start a whitespace run. - * - * @update gess 3/25/98 - * @param aChar: last char read - * @param aScanner: see nsScanner.h - * @param anErrorCode: arg that will hold error condition - * @return new token or null - */ -nsresult nsHTMLTokenizer::ConsumeWhitespace(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner) { - // Get the whitespace character - aScanner.GetChar(aChar); - - nsTokenAllocator* theAllocator=this->GetTokenAllocator(); - aToken = theAllocator->CreateTokenOfType(eToken_whitespace,eHTMLTag_whitespace); - nsresult result=NS_OK; - if(aToken) { - result=aToken->Consume(aChar,aScanner,mFlags); - AddToken(aToken,result,&mTokenDeque,theAllocator); - } - return result; -} - -/** - * This method is called just after a "<!" has been consumed - * and we know we're at the start of a comment. - * - * @update gess 3/25/98 - * @param aChar: last char read - * @param aScanner: see nsScanner.h - * @param anErrorCode: arg that will hold error condition - * @return new token or null - */ -nsresult nsHTMLTokenizer::ConsumeComment(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner){ - // Get the "!" - aScanner.GetChar(aChar); - - nsTokenAllocator* theAllocator=this->GetTokenAllocator(); - aToken = theAllocator->CreateTokenOfType(eToken_comment,eHTMLTag_comment); - nsresult result=NS_OK; - if(aToken) { - result=aToken->Consume(aChar,aScanner,mFlags); - AddToken(aToken,result,&mTokenDeque,theAllocator); - } - return result; -} - -/** - * This method is called just after a known text char has - * been consumed and we should read a text run. - * - * @update gess 3/25/98 - * @param aChar: last char read - * @param aScanner: see nsScanner.h - * @param anErrorCode: arg that will hold error condition - * @return new token or null - */ -nsresult nsHTMLTokenizer::ConsumeText(CToken*& aToken,nsScanner& aScanner){ - nsresult result=NS_OK; - nsTokenAllocator* theAllocator=this->GetTokenAllocator(); - CTextToken* theToken = (CTextToken*)theAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text); - if(theToken) { - PRUnichar ch=0; - result=theToken->Consume(ch,aScanner,mFlags); - if(NS_FAILED(result)) { - if(0==theToken->GetTextLength()){ - IF_FREE(aToken, mTokenAllocator); - aToken = nsnull; - } - else result=NS_OK; - } - aToken = theToken; - AddToken(aToken,result,&mTokenDeque,theAllocator); - } - return result; -} - -/** - * This method is called just after a "<!" has been consumed. - * NOTE: Here we might consume DOCTYPE and "special" markups. - * - * - * @update harishd 09/02/99 - * @param aChar: last char read - * @param aScanner: see nsScanner.h - * @param anErrorCode: arg that will hold error condition - * @return new token or null - */ -nsresult nsHTMLTokenizer::ConsumeSpecialMarkup(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner){ - - // Get the "!" - aScanner.GetChar(aChar); - - nsresult result=NS_OK; - nsAutoString theBufCopy; - aScanner.Peek(theBufCopy, 20); - ToUpperCase(theBufCopy); - PRInt32 theIndex=theBufCopy.Find("DOCTYPE"); - nsTokenAllocator* theAllocator=this->GetTokenAllocator(); - - if(theIndex==kNotFound) { - if('['==theBufCopy.CharAt(0)) { - aToken = theAllocator->CreateTokenOfType(eToken_cdatasection,eHTMLTag_comment); - } else if (StringBeginsWith(theBufCopy, NS_LITERAL_STRING("ELEMENT")) || - StringBeginsWith(theBufCopy, NS_LITERAL_STRING("ATTLIST")) || - StringBeginsWith(theBufCopy, NS_LITERAL_STRING("ENTITY")) || - StringBeginsWith(theBufCopy, NS_LITERAL_STRING("NOTATION"))) { - aToken = theAllocator->CreateTokenOfType(eToken_markupDecl,eHTMLTag_markupDecl); - } else { - aToken = theAllocator->CreateTokenOfType(eToken_comment,eHTMLTag_comment); - } - } - else - aToken = theAllocator->CreateTokenOfType(eToken_doctypeDecl,eHTMLTag_doctypeDecl); - - if(aToken) { - result=aToken->Consume(aChar,aScanner,mFlags); - AddToken(aToken,result,&mTokenDeque,theAllocator); - } - return result; -} - -/** - * This method is called just after a newline has been consumed. - * - * @update gess 3/25/98 - * @param aChar: last char read - * @param aScanner: see nsScanner.h - * @param aToken is the newly created newline token that is parsing - * @return error code - */ -nsresult nsHTMLTokenizer::ConsumeNewline(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner){ - // Get the newline character - aScanner.GetChar(aChar); - - nsTokenAllocator* theAllocator=this->GetTokenAllocator(); - aToken=theAllocator->CreateTokenOfType(eToken_newline,eHTMLTag_newline); - nsresult result=NS_OK; - if(aToken) { - result=aToken->Consume(aChar,aScanner,mFlags); - AddToken(aToken,result,&mTokenDeque,theAllocator); - } - return result; -} - - -/** - * This method is called just after a ? has been consumed. - * - * @update gess 3/25/98 - * @param aChar: last char read - * @param aScanner: see nsScanner.h - * @param aToken is the newly created newline token that is parsing - * @return error code - */ -nsresult nsHTMLTokenizer::ConsumeProcessingInstruction(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner){ - - // Get the "?" - aScanner.GetChar(aChar); - - nsTokenAllocator* theAllocator=this->GetTokenAllocator(); - aToken=theAllocator->CreateTokenOfType(eToken_instruction,eHTMLTag_unknown); - nsresult result=NS_OK; - if(aToken) { - result=aToken->Consume(aChar,aScanner,mFlags); - AddToken(aToken,result,&mTokenDeque,theAllocator); - } - return result; -} - -/** - * This method keeps a copy of contents within the start token. - * The stored content could later be used in displaying TEXTAREA, - * and also in view source. - * - * @update harishd 11/09/99 - * @param aStartToken: The token whose trailing contents are to be recorded - * @param aScanner: see nsScanner.h - * - */ - -void nsHTMLTokenizer::PreserveToken(CStartToken* aStartToken, - nsScanner& aScanner, - nsScannerIterator aOrigin) { - if(aStartToken) { - nsScannerIterator theCurrentPosition; - aScanner.CurrentPosition(theCurrentPosition); - - nsString& trailingContent = aStartToken->mTrailingContent; - PRUint32 oldLength = trailingContent.Length(); - trailingContent.SetLength(oldLength + Distance(aOrigin, theCurrentPosition)); - - nsWritingIterator<PRUnichar> beginWriting; - trailingContent.BeginWriting(beginWriting); - beginWriting.advance(oldLength); - - copy_string( aOrigin, theCurrentPosition, beginWriting ); - } -} diff --git a/htmlparser/src/nsHTMLTokenizer.h b/htmlparser/src/nsHTMLTokenizer.h deleted file mode 100644 index 7cb8a4169417..000000000000 --- a/htmlparser/src/nsHTMLTokenizer.h +++ /dev/null @@ -1,114 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - - -/** - * MODULE NOTES: - * @update gess 4/1/98 - * - */ - -#ifndef __NSHTMLTOKENIZER -#define __NSHTMLTOKENIZER - -#include "nsISupports.h" -#include "nsITokenizer.h" -#include "nsIDTD.h" -#include "prtypes.h" -#include "nsDeque.h" -#include "nsScanner.h" -#include "nsHTMLTokens.h" -#include "nsDTDUtils.h" - -#define NS_HTMLTOKENIZER_IID \ - {0xe4238ddd, 0x9eb6, 0x11d2, \ - {0xba, 0xa5, 0x0, 0x10, 0x4b, 0x98, 0x3f, 0xd4 }} - - -/*************************************************************** - Notes: - ***************************************************************/ - -#ifdef _MSC_VER -#pragma warning( disable : 4275 ) -#endif - -class nsHTMLTokenizer : public nsITokenizer { -public: - - NS_DECL_ISUPPORTS - NS_DECL_NSITOKENIZER - nsHTMLTokenizer(PRInt32 aParseMode = eDTDMode_quirks, - eParserDocType aDocType = eHTML3_Quirks, - eParserCommands aCommand = eViewNormal); - virtual ~nsHTMLTokenizer(); - -protected: - - virtual nsresult ConsumeTag(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner,PRBool& aFlushTokens); - virtual nsresult ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner,PRBool& aFlushTokens); - virtual nsresult ConsumeEndTag(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner); - virtual nsresult ConsumeAttributes(PRUnichar aChar, CToken* aToken, nsScanner& aScanner); - virtual nsresult ConsumeEntity(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner); - virtual nsresult ConsumeWhitespace(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner); - virtual nsresult ConsumeComment(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner); - virtual nsresult ConsumeNewline(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner); - virtual nsresult ConsumeText(CToken*& aToken,nsScanner& aScanner); - virtual nsresult ConsumeSpecialMarkup(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner); - virtual nsresult ConsumeProcessingInstruction(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner); - - nsresult ScanDocStructure(PRBool aIsFinalChunk); - - virtual void PreserveToken(CStartToken* aStartToken, nsScanner& aScanner, nsScannerIterator aOrigin); - - static void AddToken(CToken*& aToken,nsresult aResult,nsDeque* aDeque,nsTokenAllocator* aTokenAllocator); - - nsDeque mTokenDeque; - PRPackedBool mIsFinalChunk; - nsTokenAllocator* mTokenAllocator; - PRInt32 mTokenScanPos; - PRUint32 mFlags; - eHTMLTags mPreserveTarget; // Tag whose content is preserved -}; - -extern nsresult NS_NewHTMLTokenizer(nsITokenizer** aInstancePtrResult, - PRInt32 aMode,eParserDocType aDocType, - eParserCommands aCommand); - -#endif - - diff --git a/htmlparser/src/nsHTMLTokens.cpp b/htmlparser/src/nsHTMLTokens.cpp deleted file mode 100644 index 45a9b443ec5d..000000000000 --- a/htmlparser/src/nsHTMLTokens.cpp +++ /dev/null @@ -1,2336 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include <ctype.h> -#include <time.h> -#include <stdio.h> -#include "nsScanner.h" -#include "nsToken.h" -#include "nsIAtom.h" -#include "nsHTMLTokens.h" -#include "prtypes.h" -#include "nsDebug.h" -#include "nsHTMLTags.h" -#include "nsHTMLEntities.h" -#include "nsCRT.h" -#include "nsReadableUtils.h" -#include "nsUnicharUtils.h" -#include "nsScanner.h" - - -static const PRUnichar sUserdefined[] = {'u', 's', 'e', 'r', 'd', 'e', 'f', - 'i', 'n', 'e', 'd', 0}; - -static const PRUnichar kAttributeTerminalChars[] = { - PRUnichar('&'), PRUnichar('\b'), PRUnichar('\t'), - PRUnichar('\n'), PRUnichar('\r'), PRUnichar(' '), - PRUnichar('>'), - PRUnichar(0) -}; - - -/************************************************************** - And now for the token classes... - **************************************************************/ - -/* - * constructor from tag id - * - * @update gess 3/25/98 - * @param - * @return - */ -CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(aTag) { -} - - -CHTMLToken::~CHTMLToken() { - -} - -/* - * constructor from tag id - * - * @update gess 3/25/98 - * @param - * @return - */ -CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) { - mEmpty=PR_FALSE; - mContainerInfo=eFormUnknown; -#ifdef DEBUG - mAttributed = PR_FALSE; -#endif -} - -CStartToken::CStartToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) { - mEmpty=PR_FALSE; - mContainerInfo=eFormUnknown; - mTextValue.Assign(aName); -#ifdef DEBUG - mAttributed = PR_FALSE; -#endif -} - -CStartToken::CStartToken(const nsAString& aName,eHTMLTags aTag) : CHTMLToken(aTag) { - mEmpty=PR_FALSE; - mContainerInfo=eFormUnknown; - mTextValue.Assign(aName); -#ifdef DEBUG - mAttributed = PR_FALSE; -#endif -} - -/* - * This method returns the typeid (the tag type) for this token. - * - * @update gess 3/25/98 - * @param - * @return - */ -PRInt32 CStartToken::GetTypeID(){ - if(eHTMLTag_unknown==mTypeID) { - mTypeID = nsHTMLTags::LookupTag(mTextValue); - } - return mTypeID; -} - -/* - * - * - * @update gess 3/25/98 - * @param - * @return - */ -PRInt32 CStartToken::GetTokenType(void) { - return eToken_start; -} - -/* - * - * - * @update gess 3/25/98 - * @param - * @return - */ -void CStartToken::SetEmpty(PRBool aValue) { - mEmpty=aValue; -} - -/* - * - * - * @update gess 3/25/98 - * @param - * @return - */ -PRBool CStartToken::IsEmpty(void) { - return mEmpty; -} - - -/* - * Consume the identifier portion of the start tag - * - * @update gess 3/25/98 - * @param aChar -- last char consumed from stream - * @param aScanner -- controller of underlying input source - * @param aFlag - contains information such as |dtd mode|view mode|doctype|etc... - * @return error result - */ -nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) { - - //if you're here, we've already Consumed the < char, and are - //ready to Consume the rest of the open tag identifier. - //Stop consuming as soon as you see a space or a '>'. - //NOTE: We don't Consume the tag attributes here, nor do we eat the ">" - - nsresult result=NS_OK; - if (aFlag & NS_IPARSER_FLAG_HTML) { - nsAutoString theSubstr; - result=aScanner.ReadTagIdentifier(theSubstr); - mTypeID = (PRInt32)nsHTMLTags::LookupTag(theSubstr); - // Save the original tag string if this is user-defined or if we - // are viewing source - if(eHTMLTag_userdefined==mTypeID || (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - mTextValue=theSubstr; - } - } - else { - //added PR_TRUE to readId() call below to fix bug 46083. The problem was that the tag given - //was written <title_> but since we didn't respect the '_', we only saw <title>. Then - //we searched for end title, which never comes (they give </title_>). - - result=aScanner.ReadTagIdentifier(mTextValue); - mTypeID = nsHTMLTags::LookupTag(mTextValue); - } - - if (NS_SUCCEEDED(result) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - result = aScanner.SkipWhitespace(mNewlineCount); - } - - return result; -} - - -const nsAString& CStartToken::GetStringValue() -{ - if((eHTMLTag_unknown<mTypeID) && (mTypeID<eHTMLTag_text)) { - if(!mTextValue.Length()) { - mTextValue.Assign(nsHTMLTags::GetStringValue((nsHTMLTag) mTypeID)); - } - } - return mTextValue; -} - -/* - * - * - * @update gess 3/25/98 - * @param anOutputString will recieve the result - * @return nada - */ -void CStartToken::GetSource(nsString& anOutputString){ - anOutputString.Truncate(); - AppendSourceTo(anOutputString); -} - -/* - * - * - * @update harishd 03/23/00 - * @param result appended to the output string. - * @return nada - */ -void CStartToken::AppendSourceTo(nsAString& anOutputString){ - anOutputString.Append(PRUnichar('<')); - /* - * Watch out for Bug 15204 - */ - if(!mTrailingContent.IsEmpty()) - anOutputString.Append(mTrailingContent); - else { - if(!mTextValue.IsEmpty()) - anOutputString.Append(mTextValue); - else - anOutputString.Append(GetTagName(mTypeID)); - anOutputString.Append(PRUnichar('>')); - } -} - -/* - * constructor from tag id - * - * @update gess 3/25/98 - * @param - * @return - */ -CEndToken::CEndToken(eHTMLTags aTag) : CHTMLToken(aTag) { -} - -CEndToken::CEndToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) { - mTextValue.Assign(aName); -} - -CEndToken::CEndToken(const nsAString& aName,eHTMLTags aTag) : CHTMLToken(aTag) { - mTextValue.Assign(aName); -} - -/* - * Consume the identifier portion of the end tag - * - * @update gess 3/25/98 - * @param aChar -- last char consumed from stream - * @param aScanner -- controller of underlying input source - * @param aFlag - contains information such as |dtd mode|view mode|doctype|etc... - * @return error result - */ -nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) -{ - nsresult result = NS_OK; - if (aFlag & NS_IPARSER_FLAG_HTML) { - nsAutoString theSubstr; - result=aScanner.ReadTagIdentifier(theSubstr); - NS_ENSURE_SUCCESS(result, result); - - mTypeID = (PRInt32)nsHTMLTags::LookupTag(theSubstr); - // Save the original tag string if this is user-defined or if we - // are viewing source - if(eHTMLTag_userdefined==mTypeID || - (aFlag & (NS_IPARSER_FLAG_VIEW_SOURCE | NS_IPARSER_FLAG_PRESERVE_CONTENT))) { - mTextValue=theSubstr; - } - } - else { - result = aScanner.ReadTagIdentifier(mTextValue); - NS_ENSURE_SUCCESS(result, result); - - mTypeID = nsHTMLTags::LookupTag(mTextValue); - } - - if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - result = aScanner.SkipWhitespace(mNewlineCount); - NS_ENSURE_SUCCESS(result, result); - } - - return result; -} - - -/* - * Asks the token to determine the <i>HTMLTag type</i> of - * the token. This turns around and looks up the tag name - * in the tag dictionary. - * - * @update gess 3/25/98 - * @param - * @return eHTMLTag id of this endtag - */ -PRInt32 CEndToken::GetTypeID(){ - if(eHTMLTag_unknown==mTypeID) { - mTypeID = nsHTMLTags::LookupTag(mTextValue); - switch(mTypeID) { - case eHTMLTag_dir: - case eHTMLTag_menu: - mTypeID=eHTMLTag_ul; - break; - default: - break; - } - } - return mTypeID; -} - -/* - * - * - * @update gess 3/25/98 - * @param - * @return - */ -PRInt32 CEndToken::GetTokenType(void) { - return eToken_end; -} - -const nsAString& CEndToken::GetStringValue() -{ - if((eHTMLTag_unknown<mTypeID) && (mTypeID<eHTMLTag_text)) { - if(!mTextValue.Length()) { - mTextValue.Assign(nsHTMLTags::GetStringValue((nsHTMLTag) mTypeID)); - } - } - return mTextValue; -} - -/* - * - * - * @update gess 3/25/98 - * @param anOutputString will recieve the result - * @return nada - */ -void CEndToken::GetSource(nsString& anOutputString){ - anOutputString.Truncate(); - AppendSourceTo(anOutputString); -} - -/* - * - * - * @update harishd 03/23/00 - * @param result appended to the output string. - * @return nada - */ -void CEndToken::AppendSourceTo(nsAString& anOutputString){ - anOutputString.Append(NS_LITERAL_STRING("</")); - if(!mTextValue.IsEmpty()) - anOutputString.Append(mTextValue); - else - anOutputString.Append(GetTagName(mTypeID)); - anOutputString.Append(PRUnichar('>')); -} - -/* - * default constructor - * - * @update gess 3/25/98 - * @param aName -- string to init token name with - * @return - */ -CTextToken::CTextToken() : CHTMLToken(eHTMLTag_text) { -} - - -/* - * string based constructor - * - * @update gess 3/25/98 - * @param aName -- string to init token name with - * @return - */ -CTextToken::CTextToken(const nsAString& aName) : CHTMLToken(eHTMLTag_text) { - mTextValue.Rebind(aName); -} - -/* - * - * - * @update gess 3/25/98 - * @param - * @return - */ -PRInt32 CTextToken::GetTokenType(void) { - return eToken_text; -} - -PRInt32 CTextToken::GetTextLength(void) { - return mTextValue.Length(); -} - -/* - * Consume as much clear text from scanner as possible. - * - * @update gess 3/25/98 - * @param aChar -- last char consumed from stream - * @param aScanner -- controller of underlying input source - * @return error result - */ -nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) { - static const PRUnichar theTerminalsChars[] = - { PRUnichar('\n'), PRUnichar('\r'), PRUnichar('&'), PRUnichar('<'), - PRUnichar(0) }; - static const nsReadEndCondition theEndCondition(theTerminalsChars); - nsresult result=NS_OK; - PRBool done=PR_FALSE; - nsScannerIterator origin, start, end; - - // Start scanning after the first character, because we know it to - // be part of this text token (we wouldn't have come here if it weren't) - aScanner.CurrentPosition(origin); - start = origin; - ++start; - aScanner.SetPosition(start); - aScanner.EndReading(end); - - while((NS_OK==result) && (!done)) { - result=aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE); - if(NS_OK==result) { - result=aScanner.Peek(aChar); - - if(((kCR==aChar) || (kNewLine==aChar)) && (NS_OK==result)) { - result=aScanner.GetChar(aChar); //strip off the char - PRUnichar theNextChar; - result=aScanner.Peek(theNextChar); //then see what's next. - switch(aChar) { - case kCR: - // result=aScanner.GetChar(aChar); - if(kLF==theNextChar) { - // If the "\r" is followed by a "\n", don't replace it and - // let it be ignored by the layout system - end.advance(2); - result=aScanner.GetChar(theNextChar); - } - else { - // If it standalone, replace the "\r" with a "\n" so that - // it will be considered by the layout system - aScanner.ReplaceCharacter(end, kLF); - ++end; - } - ++mNewlineCount; - break; - case kLF: - ++end; - ++mNewlineCount; - break; - } //switch - } - else done=PR_TRUE; - } - } - - aScanner.BindSubstring(mTextValue, origin, end); - - return result; -} - -/* - * Consume as much clear text from scanner as possible. - * - * @update gess 3/25/98 - * @param aChar -- last char consumed from stream - * @param aScanner -- controller of underlying input source - * @return error result - */ -nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner, - nsString& aEndTagName,PRInt32 aFlag,PRBool& aFlushTokens){ - nsresult result=NS_OK; - nsScannerIterator theStartOffset, theCurrOffset, theTermStrPos, theStartCommentPos, theAltTermStrPos, endPos; - PRBool done=PR_FALSE; - PRBool theLastIteration=PR_FALSE; - - aScanner.CurrentPosition(theStartOffset); - theCurrOffset = theStartOffset; - aScanner.EndReading(endPos); - theTermStrPos = theStartCommentPos = theAltTermStrPos = endPos; - - // ALGORITHM: *** The performance is based on correctness of the document *** - // 1. Look for a '<' character. This could be - // a) Start of a comment (<!--), b) Start of the terminal string, or c) a start of a tag. - // We are interested in a) and b). c) is ignored because in CDATA we don't care for tags. - // NOTE: Technically speaking in CDATA we should ignore the comments too!! But for compatibility - // we don't. - // 2. Having the offset, for '<', search for the terminal string from there on and record its offset. - // 3. From the same '<' offset also search for start of a comment '<!--'. If found search for - // end comment '-->' between the terminal string and '<!--'. If you did not find the end - // comment, then we have a malformed document, i.e., this section has a prematured terminal string - // Ex. <SCRIPT><!-- document.write('</SCRIPT>') //--> </SCRIPT>. But anyway record terminal string's - // offset and update the current offset to the terminal string (prematured) offset and goto step 1. - // 4. Amen...If you found a terminal string and '-->'. Otherwise goto step 1. - // 5. If the end of the document is reached and if we still don't have the condition in step 4. then - // assume that the prematured terminal string is the actual terminal string and goto step 1. This - // will be our last iteration. - - const NS_NAMED_LITERAL_STRING(ltslash, "</"); - const nsString theTerminalString = ltslash + aEndTagName; - - PRUint32 termStrLen=theTerminalString.Length(); - while((result == NS_OK) && !done) { - PRBool found = PR_FALSE; - nsScannerIterator gtOffset,ltOffset = theCurrOffset; - while (FindCharInReadable(PRUnichar(kLessThan), ltOffset, endPos) && - ((PRUint32)ltOffset.size_forward() >= termStrLen || - Distance(ltOffset, endPos) >= termStrLen)) { - // Make a copy of the (presumed) end tag and - // do a case-insensitive comparison - - nsScannerIterator start(ltOffset), end(ltOffset); - end.advance(termStrLen); - - if (CaseInsensitiveFindInReadable(theTerminalString,start,end) && - end != endPos && (*end == '>' || *end == ' ' || - *end == '\t' || *end == '\n' || - *end == '\r' || *end == '\b')) { - gtOffset = end; - if (FindCharInReadable(PRUnichar(kGreaterThan), gtOffset, endPos)) { - found = PR_TRUE; - theTermStrPos = start; - } - break; - } - ltOffset.advance(1); - } - - if (found && theTermStrPos != endPos) { - if(!(aFlag & NS_IPARSER_FLAG_STRICT_MODE) && - !theLastIteration && !aIgnoreComments) { - nsScannerIterator endComment(ltOffset); - endComment.advance(5); - - if ((theStartCommentPos == endPos) && - FindInReadable(NS_LITERAL_STRING("<!--"), theCurrOffset, endComment)) { - theStartCommentPos = theCurrOffset; - } - - if (theStartCommentPos != endPos) { - // Search for --> between <!-- and </TERMINALSTRING>. - theCurrOffset = theStartCommentPos; - nsScannerIterator terminal(theTermStrPos); - if (!RFindInReadable(NS_LITERAL_STRING("-->"), - theCurrOffset, terminal)) { - // If you're here it means that we have a bogus terminal string. - // Even though it is bogus, the position of the terminal string - // could be helpful in case we hit the rock bottom. - theAltTermStrPos = theTermStrPos; - - // We did not find '-->' so keep searching for terminal string. - theCurrOffset = theTermStrPos; - theCurrOffset.advance(termStrLen); - continue; - } - } - } - - // Make sure to preserve the end tag's representation if needed - if(aFlag & (NS_IPARSER_FLAG_VIEW_SOURCE | NS_IPARSER_FLAG_PRESERVE_CONTENT)) { - CopyUnicodeTo(ltOffset.advance(2),gtOffset,aEndTagName); - } - - aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos); - aScanner.SetPosition(gtOffset.advance(1)); - - // We found </SCRIPT>...permit flushing -> Ref: Bug 22485 - aFlushTokens=PR_TRUE; - done = PR_TRUE; - } - else { - // We end up here if: - // a) when the buffer runs out ot data. - // b) when the terminal string is not found. - if(!aScanner.IsIncremental()) { - if(theAltTermStrPos != endPos) { - // If you're here it means..we hit the rock bottom and therefore switch to plan B. - theCurrOffset = theAltTermStrPos; - theLastIteration = PR_TRUE; - } - else { - done = PR_TRUE; // Do this to fix Bug. 35456 - } - } - else { - result=kEOF; - } - } - } - return result; -} - -void CTextToken::CopyTo(nsAString& aStr) -{ - nsScannerIterator start, end; - mTextValue.BeginReading(start); - mTextValue.EndReading(end); - CopyUnicodeTo(start, end, aStr); -} - -const nsAString& CTextToken::GetStringValue(void) -{ - return mTextValue.AsString(); -} - -void CTextToken::Bind(nsScanner* aScanner, nsScannerIterator& aStart, nsScannerIterator& aEnd) -{ - aScanner->BindSubstring(mTextValue, aStart, aEnd); -} - -void CTextToken::Bind(const nsAString& aStr) -{ - mTextValue.Rebind(aStr); -} - -/* - * default constructor - * - * @update vidur 11/12/98 - * @param aName -- string to init token name with - * @return - */ -CCDATASectionToken::CCDATASectionToken(eHTMLTags aTag) : CHTMLToken(aTag) { -} - - -/* - * string based constructor - * - * @update vidur 11/12/98 - * @param aName -- string to init token name with - * @return - */ -CCDATASectionToken::CCDATASectionToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) { - mTextValue.Assign(aName); -} - -/* - * - * @update vidur 11/12/98 - * @param - * @return - */ -PRInt32 CCDATASectionToken::GetTokenType(void) { - return eToken_cdatasection; -} - -/* - * Consume as much marked test from scanner as possible. - * - * @update rgess 12/15/99: had to handle case: "<![ ! IE 5]>", in addition to "<![..[..]]>". - * @param aChar -- last char consumed from stream - * @param aScanner -- controller of underlying input source - * @return error result - */ -nsresult CCDATASectionToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) { - static const PRUnichar theTerminalsChars[] = - { PRUnichar('\r'), PRUnichar('\n'), PRUnichar(']'), PRUnichar(0) }; - static const nsReadEndCondition theEndCondition(theTerminalsChars); - nsresult result=NS_OK; - PRBool done=PR_FALSE; - - while((NS_OK==result) && (!done)) { - result=aScanner.ReadUntil(mTextValue,theEndCondition,PR_FALSE); - if(NS_OK==result) { - result=aScanner.Peek(aChar); - if((kCR==aChar) && (NS_OK==result)) { - result=aScanner.GetChar(aChar); //strip off the \r - result=aScanner.Peek(aChar); //then see what's next. - if(NS_OK==result) { - switch(aChar) { - case kCR: - result=aScanner.GetChar(aChar); //strip off the \r - mTextValue.Append(NS_LITERAL_STRING("\n\n")); - mNewlineCount += 2; - break; - case kNewLine: - //which means we saw \r\n, which becomes \n - result=aScanner.GetChar(aChar); //strip off the \n - //now fall through on purpose... - default: - mTextValue.Append(NS_LITERAL_STRING("\n")); - mNewlineCount++; - break; - } //switch - } //if - } - else if (kNewLine == aChar) { - result=aScanner.GetChar(aChar); - mTextValue.Append(aChar); - ++mNewlineCount; - } - else if (kRightSquareBracket == aChar) { - result=aScanner.GetChar(aChar); //strip off the ] - mTextValue.Append(aChar); - result=aScanner.Peek(aChar); //then see what's next. - if((NS_OK==result) && (kRightSquareBracket==aChar)) { - result=aScanner.GetChar(aChar); //strip off the second ] - mTextValue.Append(aChar); - } - // The goal here is to not lose data from the page when encountering - // markup like: <![endif]-->. This means that in normal parsing, we - // allow ']' to end the marked section and just drop everything between - // it an the '>'. In view-source mode, we cannot drop things on the - // floor like that. In fact, to make view-source of XML with script in - // CDATA sections at all bearable, we need to somewhat enforce the ']>' - // terminator for marked sections. So make the tokenization somewhat - // different when in view-source _and_ dealing with a CDATA section. - PRBool inCDATA = (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) && - StringBeginsWith(mTextValue, NS_LITERAL_STRING("[CDATA[")); - if (inCDATA) { - result = aScanner.Peek(aChar); - } else { - nsAutoString dummy; // skip any bad data - result=aScanner.ReadUntil(dummy,kGreaterThan,PR_FALSE); - } - if (NS_OK==result && - (!inCDATA || kGreaterThan == aChar)) { - result=aScanner.GetChar(aChar); //strip off the > - done=PR_TRUE; - } - } - else done=PR_TRUE; - } - } - return result; -} - -const nsAString& CCDATASectionToken::GetStringValue(void) -{ - return mTextValue; -} - - -/* - * default constructor - * - * @param aName -- string to init token name with - * @return - */ -CMarkupDeclToken::CMarkupDeclToken() : CHTMLToken(eHTMLTag_markupDecl) { -} - - -/* - * string based constructor - * - * @param aName -- string to init token name with - * @return - */ -CMarkupDeclToken::CMarkupDeclToken(const nsAString& aName) : CHTMLToken(eHTMLTag_markupDecl) { - mTextValue.Rebind(aName); -} - - -/* - * - * @param - * @return - */ -PRInt32 CMarkupDeclToken::GetTokenType(void) { - return eToken_markupDecl; -} - -/* - * Consume as much declaration from scanner as possible. - * Declaration is a markup declaration of ELEMENT, ATTLIST, ENTITY or - * NOTATION, which can span multiple lines and ends in >. - * - * @param aChar -- last char consumed from stream - * @param aScanner -- controller of underlying input source - * @return error result - */ -nsresult CMarkupDeclToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) { - static const PRUnichar theTerminalsChars[] = - { PRUnichar('\n'), PRUnichar('\r'), PRUnichar('\''), PRUnichar('"'), - PRUnichar('>'), - PRUnichar(0) }; - static const nsReadEndCondition theEndCondition(theTerminalsChars); - nsresult result=NS_OK; - PRBool done=PR_FALSE; - PRUnichar quote=0; - - nsScannerIterator origin, start, end; - aScanner.CurrentPosition(origin); - start = origin; - - while((NS_OK==result) && (!done)) { - aScanner.SetPosition(start); - result=aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE); - if(NS_OK==result) { - result=aScanner.Peek(aChar); - - if(NS_OK==result) { - PRUnichar theNextChar=0; - if ((kCR==aChar) || (kNewLine==aChar)) { - result=aScanner.GetChar(aChar); //strip off the char - result=aScanner.Peek(theNextChar); //then see what's next. - } - switch(aChar) { - case kCR: - // result=aScanner.GetChar(aChar); - if(kLF==theNextChar) { - // If the "\r" is followed by a "\n", don't replace it and - // let it be ignored by the layout system - end.advance(2); - result=aScanner.GetChar(theNextChar); - } - else { - // If it standalone, replace the "\r" with a "\n" so that - // it will be considered by the layout system - aScanner.ReplaceCharacter(end, kLF); - ++end; - } - ++mNewlineCount; - break; - case kLF: - ++end; - ++mNewlineCount; - break; - case '\'': - case '"': - ++end; - if (quote) { - if (quote == aChar) { - quote = 0; - } - } else { - quote = aChar; - } - break; - case kGreaterThan: - if (quote) { - ++end; - } else { - start = end; - ++start; // Note that start is wrong after this, we just avoid temp var - aScanner.SetPosition(start); // Skip the > - done=PR_TRUE; - } - break; - default: - NS_ABORT_IF_FALSE(0,"should not happen, switch is missing cases?"); - break; - } //switch - start = end; - } - else done=PR_TRUE; - } // if read until !ok - } // while - - aScanner.BindSubstring(mTextValue, origin, end); - - return result; -} - -const nsAString& CMarkupDeclToken::GetStringValue(void) -{ - return mTextValue.AsString(); -} - - -/* - * Default constructor - * - * @update gess 3/25/98 - * @param aName -- string to init token name with - * @return - */ -CCommentToken::CCommentToken() : CHTMLToken(eHTMLTag_comment) { -} - - -/* - * Copy constructor - * - * @update gess 3/25/98 - * @param - * @return - */ -CCommentToken::CCommentToken(const nsAString& aName) : CHTMLToken(eHTMLTag_comment) { - mComment.Rebind(aName); -} - -void CCommentToken::AppendSourceTo(nsAString& anOutputString){ - AppendUnicodeTo(mCommentDecl, anOutputString); -} - -static PRBool IsCommentEnd( - const nsScannerIterator& aCurrent, - const nsScannerIterator& aEnd, - nsScannerIterator& aGt) -{ - nsScannerIterator current = aCurrent; - PRInt32 dashes = 0; - - while ((current != aEnd) && (dashes != 2)) { - if (*current == kGreaterThan) { - aGt = current; - return PR_TRUE; - } - if (*current == PRUnichar('-')) { - ++dashes; - } else { - dashes = 0; - } - ++current; - } - - return PR_FALSE; -} - -nsresult CCommentToken::ConsumeStrictComment(nsScanner& aScanner) -{ - // <!--[... -- ... -- ...]*--> - /********************************************************* - NOTE: This algorithm does a fine job of handling comments - when they're formatted per spec, but if they're not - we don't handle them well. - *********************************************************/ - nsScannerIterator end, current, gt, lt; - aScanner.EndReading(end); - aScanner.CurrentPosition(current); - - nsScannerIterator beginData = end; - - lt = current; - lt.advance(-2); // <! - - // Regular comment must start with <!-- - if (current != end && *current == kMinus && - ++current != end && *current == kMinus && - ++current != end) { - nsScannerIterator currentEnd = end; - PRBool balancedComment = PR_FALSE; - static NS_NAMED_LITERAL_STRING(dashes,"--"); - beginData = current; - - while (FindInReadable(dashes, current, currentEnd)) { - current.advance(2); - - balancedComment = !balancedComment; // We need to match '--' with '--' - - if (balancedComment && IsCommentEnd(current, end, gt)) { - // done - current.advance(-2); - if (beginData != current) { // protects from <!----> - aScanner.BindSubstring(mComment, beginData, current); - } - aScanner.BindSubstring(mCommentDecl, lt, ++gt); - aScanner.SetPosition(gt); - return NS_OK; - } else { - // Continue after the last '--' - currentEnd = end; - } - } - } - - // If beginData == end, we did not find opening '--' - if (beginData == end) { - // This might have been empty comment: <!> - // Or it could have been something completely bogus like: <!This is foobar> - // Handle both cases below - aScanner.CurrentPosition(current); - beginData = current; - if (FindCharInReadable('>', current, end)) { - aScanner.BindSubstring(mComment, beginData, current); - aScanner.BindSubstring(mCommentDecl, lt, ++current); - aScanner.SetPosition(current); - return NS_OK; - } - } - - if (aScanner.IsIncremental()) { - // We got here because we saw the beginning of a comment, - // but not yet the end, and we are still loading the page. In that - // case the return value here will cause us to unwind, - // wait for more content, and try again. - // XXX For performance reasons we should cache where we were, and - // continue from there for next call - return kEOF; // not really an nsresult, but... - } - - // XXX We should return kNotAComment, parse comment open as text, and parse - // the rest of the document normally. Now we ALMOST do that: <! is - // missing from the content model. - return NS_OK; -} - -nsresult CCommentToken::ConsumeQuirksComment(nsScanner& aScanner) -{ - // <![-[-]] ... [[-]-|--!]> - /********************************************************* - NOTE: This algorithm does a fine job of handling comments - commonly used, but it doesn't really consume them - per spec (But then, neither does IE or Nav). - *********************************************************/ - nsScannerIterator end, current; - aScanner.EndReading(end); - aScanner.CurrentPosition(current); - nsScannerIterator beginData = current, - beginLastMinus = end, - bestAltCommentEnd = end, - lt = current; - lt.advance(-2); // <! - - // When we get here, we have always already consumed <! - // Skip over possible leading minuses - if (current != end && *current == kMinus) { - beginLastMinus = current; - ++current; - ++beginData; - if (current != end && *current == kMinus) { // <!-- - beginLastMinus = current; - ++current; - ++beginData; - // Long form comment - - nsScannerIterator currentEnd = end, gt = end; - - // Find the end of the comment - while (FindCharInReadable(kGreaterThan, current, currentEnd)) { - gt = current; - if (bestAltCommentEnd == end) { - bestAltCommentEnd = gt; - } - --current; - PRBool goodComment = PR_FALSE; - if (current != beginLastMinus && *current == kMinus) { // -> - --current; - if (current != beginLastMinus && *current == kMinus) { // --> - goodComment = PR_TRUE; - --current; - } - } else if (current != beginLastMinus && *current == '!') { - --current; - if (current != beginLastMinus && *current == kMinus) { - --current; - if (current != beginLastMinus && *current == kMinus) { // --!> - --current; - goodComment = PR_TRUE; - } - } - } else if (current == beginLastMinus) { - goodComment = PR_TRUE; - } - - if (goodComment) { - // done - if (beginLastMinus != current) { // protects from <!----> - aScanner.BindSubstring(mComment, beginData, ++current); - } - aScanner.BindSubstring(mCommentDecl, lt, ++gt); - aScanner.SetPosition(gt); - return NS_OK; - } else { - // try again starting after the last '>' - current = ++gt; - currentEnd = end; - } - } //while - - if (aScanner.IsIncremental()) { - // We got here because we saw the beginning of a comment, - // but not yet the end, and we are still loading the page. In that - // case the return value here will cause us to unwind, - // wait for more content, and try again. - // XXX For performance reasons we should cache where we were, and - // continue from there for next call - return kEOF; // not really an nsresult, but... - } - - // If you're here, then we're in a special state. - // The problem at hand is that we've hit the end of the document without finding the normal endcomment delimiter "-->". - // In this case, the first thing we try is to see if we found an alternate endcomment delimiter ">". - // If so, rewind just pass that, and use everything up to that point as your comment. - // If not, the document has no end comment and should be treated as one big comment. - gt = bestAltCommentEnd; - if (beginData != gt) { // protects from <!--> - aScanner.BindSubstring(mComment, beginData, gt); - } - if (gt != end) { - ++gt; - } - aScanner.BindSubstring(mCommentDecl, lt, gt); - aScanner.SetPosition(gt); - return NS_OK; - } - } - - // This could be short form of comment - // Find the end of the comment - current = beginData; - if (FindCharInReadable(kGreaterThan, current, end)) { - nsScannerIterator gt = current; - if (current != beginData) { - --current; - if (current != beginData && *current == kMinus) { // -> - --current; - if (current != beginData && *current == kMinus) { // --> - --current; - } - } else if (current != beginData && *current == '!') { // !> - --current; - if (current != beginData && *current == kMinus) { // -!> - --current; - if (current != beginData && *current == kMinus) { // --!> - --current; - } - } - } - } - - if (current != gt) { - aScanner.BindSubstring(mComment, beginData, ++current); - } - aScanner.BindSubstring(mCommentDecl, lt, ++gt); - aScanner.SetPosition(gt); - return NS_OK; - } - - return kEOF; // not really an nsresult, but... -} - -/* - * Consume the identifier portion of the comment. - * Note that we've already eaten the "<!" portion. - * - * @update gess 16June2000 - * @param aChar -- last char consumed from stream - * @param aScanner -- controller of underlying input source - * @return error result - */ -nsresult CCommentToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) { - nsresult result=PR_TRUE; - - if (aFlag & NS_IPARSER_FLAG_STRICT_MODE) { - //Enabling strict comment parsing for Bug 53011 and 2749 contradicts!!!! - result = ConsumeStrictComment(aScanner); - } - else { - result = ConsumeQuirksComment(aScanner); - } - - if (NS_SUCCEEDED(result)) { - mNewlineCount = !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) ? mCommentDecl.CountChar(kNewLine) : -1; - } - - return result; -} - -const nsAString& CCommentToken::GetStringValue(void) -{ - return mComment.AsString(); -} - -/* - * - * - * @update gess 3/25/98 - * @param - * @return - */ -PRInt32 CCommentToken::GetTokenType(void) { - return eToken_comment; -} - -/* - * default constructor - * - * @update gess 3/25/98 - * @param aName -- string to init token name with - * @return - */ -CNewlineToken::CNewlineToken() : CHTMLToken(eHTMLTag_newline) { -} - -/* - * - * - * @update gess 3/25/98 - * @param - * @return - */ -PRInt32 CNewlineToken::GetTokenType(void) { - return eToken_newline; -} - - -static nsScannerSubstring* gNewlineStr; -void CNewlineToken::AllocNewline() -{ - gNewlineStr = new nsScannerSubstring(NS_LITERAL_STRING("\n")); -} - -void CNewlineToken::FreeNewline() -{ - if (gNewlineStr) { - delete gNewlineStr; - gNewlineStr = nsnull; - } -} - -/** - * This method retrieves the value of this internal string. - * - * @update gess 3/25/98 - * @return nsString reference to internal string value - */ -const nsAString& CNewlineToken::GetStringValue(void) { - return gNewlineStr->AsString(); -} - -/* - * Consume as many cr/lf pairs as you can find. - * - * @update gess 3/25/98 - * @param aChar -- last char consumed from stream - * @param aScanner -- controller of underlying input source - * @return error result - */ -nsresult CNewlineToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) { - -/******************************************************************* - - Here's what the HTML spec says about newlines: - - "A line break is defined to be a carriage return ( ), - a line feed ( ), or a carriage return/line feed pair. - All line breaks constitute white space." - - *******************************************************************/ - - PRUnichar theChar; - nsresult result=aScanner.Peek(theChar); - - if(NS_OK==result) { - switch(aChar) { - case kNewLine: - if(kCR==theChar) { - result=aScanner.GetChar(theChar); - } - break; - case kCR: - //convert CRLF into just CR - if(kNewLine==theChar) { - result=aScanner.GetChar(theChar); - } - break; - default: - break; - } - } - - mNewlineCount = 1; - return result; -} - -/* - * default constructor - * - * @update gess 3/25/98 - * @param aName -- string to init token name with - * @return - */ -CAttributeToken::CAttributeToken() : CHTMLToken(eHTMLTag_unknown) { - mHasEqualWithoutValue=PR_FALSE; -#ifdef DEBUG - mLastAttribute = PR_FALSE; -#endif -} - -/* - * string based constructor - * - * @update gess 3/25/98 - * @param aName -- string value to init token name with - * @return - */ -CAttributeToken::CAttributeToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) { - mTextValue.Assign(aName); - mHasEqualWithoutValue=PR_FALSE; -#ifdef DEBUG - mLastAttribute = PR_FALSE; -#endif -} - -/* - * construct initializing data to - * key value pair - * - * @update gess 3/25/98 - * @param aName -- string value to init token name with - * @return - */ -CAttributeToken::CAttributeToken(const nsAString& aKey, const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) { - mTextValue.Assign(aName); - mTextKey.Rebind(aKey); - mHasEqualWithoutValue=PR_FALSE; -#ifdef DEBUG - mLastAttribute = PR_FALSE; -#endif -} - -/* - * - * - * @update gess 3/25/98 - * @param - * @return - */ -PRInt32 CAttributeToken::GetTokenType(void) { - return eToken_attribute; -} - -/* - * Removes non-alpha-non-digit characters from the end of a KEY - * - * @update harishd 07/15/99 - * @param - * @return - */ -void CAttributeToken::SanitizeKey() { - PRInt32 length=mTextKey.Length(); - if(length > 0) { - nsScannerIterator iter, begin, end; - mTextKey.BeginReading(begin); - mTextKey.EndReading(end); - iter = end; - - // Look for the first legal character starting from - // the end of the string - do { - --iter; - } while (!nsCRT::IsAsciiAlpha(*iter) && - !nsCRT::IsAsciiDigit(*iter) && - (iter != begin)); - - // If there were any illegal characters, just copy out the - // legal part - if (iter != --end) { - nsAutoString buf; - CopyUnicodeTo(begin, ++iter, buf); - mTextKey.Rebind(buf); - } - } - - return; -} - -const nsAString& CAttributeToken::GetKey(void) -{ - return mTextKey.AsString(); -} - -const nsAString& CAttributeToken::GetStringValue(void) -{ - return mTextValue; -} - -/* - * - * - * @update rickg 6June2000 - * @param anOutputString will recieve the result - * @return nada - */ -void CAttributeToken::GetSource(nsString& anOutputString){ - anOutputString.Truncate(); - AppendSourceTo(anOutputString); -} - -/* - * - * - * @update rickg 6June2000 - * @param result appended to the output string. - * @return nada - */ -void CAttributeToken::AppendSourceTo(nsAString& anOutputString){ - AppendUnicodeTo(mTextKey, anOutputString); - if(mTextValue.Length() || mHasEqualWithoutValue) - anOutputString.Append(NS_LITERAL_STRING("=")); - anOutputString.Append(mTextValue); - // anOutputString.Append(NS_LITERAL_STRING(";")); -} - -static void AppendNCR(nsString& aString, PRInt32 aNCRValue); -/* - * @param aScanner -- controller of underlying input source - * @param aFlag -- If NS_IPARSER_FLAG_VIEW_SOURCE do not reduce entities... - * @return error result - * - */ -static -nsresult ConsumeAttributeEntity(nsString& aString, - nsScanner& aScanner, - PRInt32 aFlag) -{ - - nsresult result=NS_OK; - - PRUnichar ch; - result=aScanner.Peek(ch, 1); - - if (NS_SUCCEEDED(result)) { - PRUnichar amp=0; - PRInt32 theNCRValue=0; - nsAutoString entity; - - if (nsCRT::IsAsciiAlpha(ch) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - result=CEntityToken::ConsumeEntity(ch,entity,aScanner); - if (NS_SUCCEEDED(result)) { - theNCRValue = nsHTMLEntities::EntityToUnicode(entity); - PRUnichar theTermChar=entity.Last(); - // If an entity value is greater than 255 then: - // Nav 4.x does not treat it as an entity, - // IE treats it as an entity if terminated with a semicolon. - // Resembling IE!! - if(theNCRValue < 0 || (theNCRValue > 255 && theTermChar != ';')) { - // Looks like we're not dealing with an entity - aString.Append(kAmpersand); - aString.Append(entity); - } - else { - // A valid entity so reduce it. - aString.Append(PRUnichar(theNCRValue)); - } - } - } - else if (ch==kHashsign && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - result=CEntityToken::ConsumeEntity(ch,entity,aScanner); - if (NS_SUCCEEDED(result)) { - if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) { - // Looked like an entity but it's not - aScanner.GetChar(amp); - aString.Append(amp); - result = NS_OK; // just being safe.. - } - else { - PRInt32 err; - theNCRValue=entity.ToInteger(&err,kAutoDetect); - AppendNCR(aString, theNCRValue); - } - } - } - else { - // What we thought as entity is not really an entity... - aScanner.GetChar(amp); - aString.Append(amp); - }//if - } - - return result; -} - -/* - * This general purpose method is used when you want to - * consume attributed text value. - * Note: It also reduces entities within attributes. - * - * @param aNewlineCount -- the newline count to increment when hitting newlines - * @param aScanner -- controller of underlying input source - * @param aTerminalChars -- characters that stop consuming attribute. - * @param aAllowNewlines -- whether to allow newlines in the value. - * XXX it would be nice to roll this info into - * aTerminalChars somehow.... - * @param aFlag - contains information such as |dtd mode|view mode|doctype|etc... - * @return error result - */ -static -nsresult ConsumeAttributeValueText(nsString& aString, - PRInt32& aNewlineCount, - nsScanner& aScanner, - const nsReadEndCondition& aEndCondition, - PRBool aAllowNewlines, - PRInt32 aFlag) -{ - nsresult result = NS_OK; - PRBool done = PR_FALSE; - - do { - result = aScanner.ReadUntil(aString,aEndCondition,PR_FALSE); - if(NS_SUCCEEDED(result)) { - PRUnichar ch; - aScanner.Peek(ch); - if(ch == kAmpersand) { - result = ConsumeAttributeEntity(aString,aScanner,aFlag); - } - else if(ch == kCR && aAllowNewlines) { - aScanner.GetChar(ch); - result = aScanner.Peek(ch); - if (NS_SUCCEEDED(result)) { - if(ch == kNewLine) { - aString.Append(NS_LITERAL_STRING("\r\n")); - aScanner.GetChar(ch); - } - else { - aString.Append(PRUnichar('\r')); - } - ++aNewlineCount; - } - } - else if(ch == kNewLine && aAllowNewlines) { - aScanner.GetChar(ch); - aString.Append(PRUnichar('\n')); - ++aNewlineCount; - } - else { - done = PR_TRUE; - } - } - } while (NS_SUCCEEDED(result) && !done); - - return result; -} - -/* - * This general purpose method is used when you want to - * consume a known quoted string. - * - * @param aScanner -- controller of underlying input source - * @param aTerminalChars -- characters that stop consuming attribute. - * @param aFlag - contains information such as |dtd mode|view mode|doctype|etc... - * @return error result - */ -static -nsresult ConsumeQuotedString(PRUnichar aChar, - nsString& aString, - PRInt32& aNewlineCount, - nsScanner& aScanner, - PRInt32 aFlag) -{ - NS_ASSERTION(aChar==kQuote || aChar==kApostrophe,"char is neither quote nor apostrophe"); - - static const PRUnichar theTerminalCharsQuote[] = { - PRUnichar(kQuote), PRUnichar('&'), PRUnichar(kCR), - PRUnichar(kNewLine), PRUnichar(0) }; - static const PRUnichar theTerminalCharsApostrophe[] = { - PRUnichar(kApostrophe), PRUnichar('&'), PRUnichar(kCR), - PRUnichar(kNewLine), PRUnichar(0) }; - static const nsReadEndCondition - theTerminateConditionQuote(theTerminalCharsQuote); - static const nsReadEndCondition - theTerminateConditionApostrophe(theTerminalCharsApostrophe); - - // Assume Quote to init to something - const nsReadEndCondition *terminateCondition = &theTerminateConditionQuote; - if (aChar==kApostrophe) - terminateCondition = &theTerminateConditionApostrophe; - - nsresult result=NS_OK; - nsScannerIterator theOffset; - aScanner.CurrentPosition(theOffset); - - result=ConsumeAttributeValueText(aString,aNewlineCount,aScanner, - *terminateCondition,PR_TRUE,aFlag); - - if(NS_SUCCEEDED(result)) { - result = aScanner.SkipOver(aChar); // aChar should be " or ' - } - - // Ref: Bug 35806 - // A back up measure when disaster strikes... - // Ex <table> <tr d="><td>hello</td></tr></table> - if(!aString.IsEmpty() && aString.Last()!=aChar && - !aScanner.IsIncremental() && result==kEOF) { - static const nsReadEndCondition - theAttributeTerminator(kAttributeTerminalChars); - aString.Truncate(); - aScanner.SetPosition(theOffset, PR_FALSE, PR_TRUE); - result=ConsumeAttributeValueText(aString,aNewlineCount,aScanner, - theAttributeTerminator,PR_FALSE,aFlag); - } - return result; -} - -/* - * Consume the key and value portions of the attribute. - * - * @update rickg 03.23.2000 - * @param aChar -- last char consumed from stream - * @param aScanner -- controller of underlying input source - * @param aFlag - contains information such as |dtd mode|view mode|doctype|etc... - * @return error result - */ -nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) { - - nsresult result; - - //I changed a bit of this method to use aRetain so that we do the right - //thing in viewsource. The ws/cr/lf sequences are now maintained, and viewsource looks good. - - nsScannerIterator wsstart, wsend; - - if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { - result = aScanner.ReadWhitespace(wsstart, wsend, mNewlineCount); - } - else { - result = aScanner.SkipWhitespace(mNewlineCount); - } - - if (NS_OK==result) { - static const PRUnichar theTerminalsChars[] = - { PRUnichar(' '), PRUnichar('"'), - PRUnichar('='), PRUnichar('\n'), - PRUnichar('\r'), PRUnichar('\t'), - PRUnichar('>'), PRUnichar('<'), - PRUnichar('\b'), PRUnichar(0) }; - static const nsReadEndCondition theEndCondition(theTerminalsChars); - - nsScannerIterator start, end; - result=aScanner.ReadUntil(start,end,theEndCondition,PR_FALSE); - - if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - aScanner.BindSubstring(mTextKey, start, end); - } - - //now it's time to Consume the (optional) value... - if (NS_OK==result) { - if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { - result = aScanner.ReadWhitespace(start, wsend, mNewlineCount); - aScanner.BindSubstring(mTextKey, wsstart, wsend); - } - else { - result = aScanner.SkipWhitespace(mNewlineCount); - } - - if (NS_OK==result) { - result=aScanner.Peek(aChar); //Skip ahead until you find an equal sign or a '>'... - if (NS_OK==result) { - if (kEqual==aChar){ - result=aScanner.GetChar(aChar); //skip the equal sign... - if (NS_OK==result) { - if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { - result = aScanner.ReadWhitespace(mTextValue, mNewlineCount); - } - else { - result = aScanner.SkipWhitespace(mNewlineCount); - } - - if (NS_OK==result) { - result=aScanner.Peek(aChar); //and grab the next char. - if (NS_OK==result) { - if ((kQuote==aChar) || (kApostrophe==aChar)) { - aScanner.GetChar(aChar); - result=ConsumeQuotedString(aChar,mTextValue,mNewlineCount, - aScanner,aFlag); - if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - mTextValue.Insert(aChar,0); - mTextValue.Append(aChar); - } - // According to spec. we ( who? ) should ignore linefeeds. But look, - // even the carriage return was getting stripped ( wonder why! ) - - // Ref. to bug 15204. Okay, so the spec. told us to ignore linefeeds, - // bug then what about bug 47535 ? Should we preserve everything then? - // Well, let's make it so! Commenting out the next two lines.. - /*if(!aRetain) - mTextValue.StripChars("\r\n"); //per the HTML spec, ignore linefeeds... - */ - } - else if (kGreaterThan==aChar){ - mHasEqualWithoutValue=PR_TRUE; - } - else { - static const nsReadEndCondition - theAttributeTerminator(kAttributeTerminalChars); - result=ConsumeAttributeValueText(mTextValue, - mNewlineCount, - aScanner, - theAttributeTerminator, - PR_FALSE, - aFlag); - } - }//if - if (NS_OK==result) { - if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { - result = aScanner.ReadWhitespace(mTextValue, mNewlineCount); - } - else { - result = aScanner.SkipWhitespace(mNewlineCount); - } - } - }//if - }//if - }//if - else { - //This is where we have to handle fairly busted content. - //If you're here, it means we saw an attribute name, but couldn't find - //the following equal sign. <tag NAME=.... - - //Doing this right in all cases is <i>REALLY</i> ugly. - //My best guess is to grab the next non-ws char. We know it's not '=', - //so let's see what it is. If it's a '"', then assume we're reading - //from the middle of the value. Try stripping the quote and continuing... - if (kQuote==aChar){ - result=aScanner.SkipOver(aChar); //strip quote. - } - } - }//if - } //if - }//if (consume optional value) - - if (NS_OK==result) { - result=aScanner.Peek(aChar); -#ifdef DEBUG - mLastAttribute = (kGreaterThan == aChar || kEOF == result); -#endif - } - }//if - return result; -} - -void CAttributeToken::SetKey(const nsAString& aKey) -{ - mTextKey.Rebind(aKey); -} - -void CAttributeToken::BindKey(nsScanner* aScanner, - nsScannerIterator& aStart, - nsScannerIterator& aEnd) -{ - aScanner->BindSubstring(mTextKey, aStart, aEnd); -} - -/* - * default constructor - * - * @update gess 3/25/98 - * @param aName -- string to init token name with - * @return - */ -CWhitespaceToken::CWhitespaceToken() : CHTMLToken(eHTMLTag_whitespace) { -} - - -/* - * default constructor - * - * @update gess 3/25/98 - * @param aName -- string value to init token name with - * @return - */ -CWhitespaceToken::CWhitespaceToken(const nsAString& aName) : CHTMLToken(eHTMLTag_whitespace) { - mTextValue.Assign(aName); -} - -/* - * - * - * @update gess 3/25/98 - * @param - * @return - */ -PRInt32 CWhitespaceToken::GetTokenType(void) { - return eToken_whitespace; -} - -/* - * This general purpose method is used when you want to - * consume an aribrary sequence of whitespace. - * - * @update gess 3/25/98 - * @param aChar -- last char consumed from stream - * @param aScanner -- controller of underlying input source - * @return error result - */ -nsresult CWhitespaceToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) { - mTextValue.Assign(aChar); - nsresult result=aScanner.ReadWhitespace(mTextValue, mNewlineCount); - if(NS_OK==result) { - mTextValue.StripChar(kCR); - } - return result; -} - -const nsAString& CWhitespaceToken::GetStringValue(void) -{ - return mTextValue; -} - -/* - * default constructor - * - * @update gess 3/25/98 - * @param aName -- string to init token name with - * @return - */ -CEntityToken::CEntityToken() : CHTMLToken(eHTMLTag_entity) { -} - -/* - * default constructor - * - * @update gess 3/25/98 - * @param aName -- string value to init token name with - * @return - */ -CEntityToken::CEntityToken(const nsAString& aName) : CHTMLToken(eHTMLTag_entity) { - mTextValue.Assign(aName); -#ifdef VERBOSE_DEBUG - if(!VerifyEntityTable()) { - cout<<"Entity table is invalid!" << endl; - } -#endif -} - - -/* - * Consume the rest of the entity. We've already eaten the "&". - * - * @update gess 3/25/98 - * @param aChar -- last char consumed from stream - * @param aScanner -- controller of underlying input source - * @return error result - */ -nsresult CEntityToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) { - nsresult result=ConsumeEntity(aChar,mTextValue,aScanner); - return result; -} - -/* - * - * - * @update gess 3/25/98 - * @param - * @return - */ -PRInt32 CEntityToken::GetTokenType(void) { - return eToken_entity; -} - -/* - * This general purpose method is used when you want to - * consume an entity &xxxx;. Keep in mind that entities - * are <i>not</i> reduced inline. - * - * @update gess 3/25/98 - * @param aChar -- last char consumed from stream - * @param aScanner -- controller of underlying input source - * @return error result - */ -nsresult -CEntityToken::ConsumeEntity(PRUnichar aChar, - nsString& aString, - nsScanner& aScanner) { - nsresult result=NS_OK; - if(kLeftBrace==aChar) { - //you're consuming a script entity... - aScanner.GetChar(aChar); // Consume & - - PRInt32 rightBraceCount = 0; - PRInt32 leftBraceCount = 0; - - do { - result=aScanner.GetChar(aChar); - - if (NS_FAILED(result)) { - return result; - } - - aString.Append(aChar); - if(aChar==kRightBrace) - ++rightBraceCount; - else if(aChar==kLeftBrace) - ++leftBraceCount; - } while(leftBraceCount!=rightBraceCount); - } //if - else { - PRUnichar theChar=0; - if (kHashsign==aChar) { - result = aScanner.Peek(theChar,2); - - if (NS_FAILED(result)) { - if (kEOF == result && !aScanner.IsIncremental()) { - // If this is the last buffer then we are certainly - // not dealing with an entity. That's, there are - // no more characters after &#. Bug 188278. - return NS_HTMLTOKENS_NOT_AN_ENTITY; - } - return result; - } - - if (nsCRT::IsAsciiDigit(theChar)) { - aScanner.GetChar(aChar); // Consume & - aScanner.GetChar(aChar); // Consume # - aString.Assign(aChar); - result=aScanner.ReadNumber(aString,10); - } - else if (theChar == 'x' || theChar == 'X') { - aScanner.GetChar(aChar); // Consume & - aScanner.GetChar(aChar); // Consume # - aScanner.GetChar(theChar); // Consume x - aString.Assign(aChar); - aString.Append(theChar); - result=aScanner.ReadNumber(aString,16); - } - else { - return NS_HTMLTOKENS_NOT_AN_ENTITY; - } - } - else { - result = aScanner.Peek(theChar,1); - - if (NS_FAILED(result)) { - return result; - } - - if(nsCRT::IsAsciiAlpha(theChar) || - theChar == '_' || - theChar == ':') { - aScanner.GetChar(aChar); // Consume & - result=aScanner.ReadEntityIdentifier(aString); - } - else { - return NS_HTMLTOKENS_NOT_AN_ENTITY; - } - } - } - - if (NS_FAILED(result)) { - return result; - } - - result=aScanner.Peek(aChar); - - if (NS_FAILED(result)) { - return result; - } - - if (aChar == kSemicolon) { - // consume semicolon that stopped the scan - aString.Append(aChar); - result=aScanner.GetChar(aChar); - } - - return result; -} - -#define PA_REMAP_128_TO_160_ILLEGAL_NCR 1 - -#ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR -/** - * Map some illegal but commonly used numeric entities into their - * appropriate unicode value. - */ -#define NOT_USED 0xfffd - -static const PRUint16 PA_HackTable[] = { - 0x20ac, /* EURO SIGN */ - NOT_USED, - 0x201a, /* SINGLE LOW-9 QUOTATION MARK */ - 0x0192, /* LATIN SMALL LETTER F WITH HOOK */ - 0x201e, /* DOUBLE LOW-9 QUOTATION MARK */ - 0x2026, /* HORIZONTAL ELLIPSIS */ - 0x2020, /* DAGGER */ - 0x2021, /* DOUBLE DAGGER */ - 0x02c6, /* MODIFIER LETTER CIRCUMFLEX ACCENT */ - 0x2030, /* PER MILLE SIGN */ - 0x0160, /* LATIN CAPITAL LETTER S WITH CARON */ - 0x2039, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ - 0x0152, /* LATIN CAPITAL LIGATURE OE */ - NOT_USED, - 0x017D, /* LATIN CAPITAL LETTER Z WITH CARON */ - NOT_USED, - NOT_USED, - 0x2018, /* LEFT SINGLE QUOTATION MARK */ - 0x2019, /* RIGHT SINGLE QUOTATION MARK */ - 0x201c, /* LEFT DOUBLE QUOTATION MARK */ - 0x201d, /* RIGHT DOUBLE QUOTATION MARK */ - 0x2022, /* BULLET */ - 0x2013, /* EN DASH */ - 0x2014, /* EM DASH */ - 0x02dc, /* SMALL TILDE */ - 0x2122, /* TRADE MARK SIGN */ - 0x0161, /* LATIN SMALL LETTER S WITH CARON */ - 0x203a, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ - 0x0153, /* LATIN SMALL LIGATURE OE */ - NOT_USED, - 0x017E, /* LATIN SMALL LETTER Z WITH CARON */ - 0x0178 /* LATIN CAPITAL LETTER Y WITH DIAERESIS */ -}; -#endif /* PA_REMAP_128_TO_160_ILLEGAL_NCR */ - -static void AppendNCR(nsString& aString, PRInt32 aNCRValue) -{ -#ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR - /* for some illegal, but popular usage */ - if ((aNCRValue >= 0x0080) && (aNCRValue <= 0x009f)) { - aNCRValue = PA_HackTable[aNCRValue - 0x0080]; - } -#endif - - if (IS_IN_BMP(aNCRValue)) - aString.Append(PRUnichar(aNCRValue)); - else { - aString.Append(PRUnichar(H_SURROGATE(aNCRValue))); - aString.Append(PRUnichar(L_SURROGATE(aNCRValue))); - } -} - -/* - * This method converts this entity into its underlying - * unicode equivalent. - * - * @update gess 3/25/98 - * @param aString will hold the resulting string value - * @return numeric (unichar) value - */ -PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) { - PRInt32 value=0; - - if(mTextValue.Length()>1) { - PRUnichar theChar0=mTextValue.CharAt(0); - - if(kHashsign==theChar0) { - PRInt32 err=0; - - value=mTextValue.ToInteger(&err,kAutoDetect); - - if(0==err) { - AppendNCR(aString, value); - } - } - else{ - value = nsHTMLEntities::EntityToUnicode(mTextValue); - if(-1<value) { - //we found a named entity... - aString.Assign(PRUnichar(value)); - } - }//else - }//if - - return value; -} - - -const nsAString& CEntityToken::GetStringValue(void) -{ - return mTextValue; -} - -/* - * - * - * @update gess 3/25/98 - * @param anOutputString will recieve the result - * @return nada - */ -void CEntityToken::GetSource(nsString& anOutputString){ - anOutputString.Append(NS_LITERAL_STRING("&")); - anOutputString+=mTextValue; - //anOutputString+=";"; -} - -/* - * - * - * @update harishd 03/23/00 - * @param result appended to the output string. - * @return nada - */ -void CEntityToken::AppendSourceTo(nsAString& anOutputString){ - anOutputString.Append(NS_LITERAL_STRING("&")); - anOutputString+=mTextValue; - //anOutputString+=";"; -} - -/* - * default constructor - * - * @update gess 3/25/98 - * @param aName -- string to init token name with - * @return - */ -CScriptToken::CScriptToken() : CHTMLToken(eHTMLTag_script) { -} - -/* - * default constructor - * - * @update gess 3/25/98 - * @param aName -- string to init token name with - * @return - */ -CScriptToken::CScriptToken(const nsAString& aString) : CHTMLToken(eHTMLTag_script) { - mTextValue.Assign(aString); -} - -/* - * - * - * @update gess 3/25/98 - * @param - * @return - */ -PRInt32 CScriptToken::GetTokenType(void) { - return eToken_script; -} - -const nsAString& CScriptToken::GetStringValue(void) -{ - return mTextValue; -} - -/* - * default constructor - * - * @update gess 3/25/98 - * @param aName -- string to init token name with - * @return - */ -CStyleToken::CStyleToken() : CHTMLToken(eHTMLTag_style) { -} - -CStyleToken::CStyleToken(const nsAString& aString) : CHTMLToken(eHTMLTag_style) { - mTextValue.Assign(aString); -} - -/* - * - * - * @update gess 3/25/98 - * @param - * @return - */ -PRInt32 CStyleToken::GetTokenType(void) { - return eToken_style; -} - -const nsAString& CStyleToken::GetStringValue(void) -{ - return mTextValue; -} - - -/** - * - * @update gess4/25/98 - * @param - * @return - */ -const PRUnichar* GetTagName(PRInt32 aTag) -{ - const PRUnichar *result = nsHTMLTags::GetStringValue((nsHTMLTag) aTag); - - if (result) { - return result; - } - - if(aTag >= eHTMLTag_userdefined) - return sUserdefined; - - return 0; -} - - -/** - * - * - * @update gess 9/23/98 - * @param - * @return - */ -CInstructionToken::CInstructionToken() : CHTMLToken(eHTMLTag_instruction) { -} - -/** - * - * - * @update gess 9/23/98 - * @param - * @return - */ -CInstructionToken::CInstructionToken(const nsAString& aString) : CHTMLToken(eHTMLTag_unknown) { - mTextValue.Assign(aString); -} - -/** - * - * - * @update gess 9/23/98 - * @param - * @return - */ -nsresult CInstructionToken::Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aFlag){ - mTextValue.Assign(NS_LITERAL_STRING("<?")); - nsresult result=aScanner.ReadUntil(mTextValue,kGreaterThan,PR_TRUE); - return result; -} - -/** - * - * - * @update gess 9/23/98 - * @param - * @return - */ -PRInt32 CInstructionToken::GetTokenType(void){ - return eToken_instruction; -} - -const nsAString& CInstructionToken::GetStringValue(void) -{ - return mTextValue; -} - - -CErrorToken::CErrorToken(nsParserError *aError) : CHTMLToken(eHTMLTag_unknown) -{ - mError = aError; -} - -CErrorToken::~CErrorToken() -{ - delete mError; -} - -PRInt32 CErrorToken::GetTokenType(void){ - return eToken_error; -} - -void CErrorToken::SetError(nsParserError *aError) { - mError = aError; -} - -const nsParserError * CErrorToken::GetError(void) -{ - return mError; -} - -const nsAString& CErrorToken::GetStringValue(void) -{ - return mTextValue; -} - -// Doctype decl token - -CDoctypeDeclToken::CDoctypeDeclToken(eHTMLTags aTag) - : CHTMLToken(aTag) { -} - -CDoctypeDeclToken::CDoctypeDeclToken(const nsAString& aString,eHTMLTags aTag) - : CHTMLToken(aTag), mTextValue(aString) { -} - -/** - * This method consumes a doctype element. - * Note: I'm rewriting this method to seek to the first <, since quotes can really screw us up. - * - * @update gess 9/23/98 - * @param - * @return - */ -nsresult CDoctypeDeclToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) { - - static const PRUnichar terminalChars[] = - { PRUnichar('>'), PRUnichar('<'), - PRUnichar(0) - }; - static const nsReadEndCondition theEndCondition(terminalChars); - - nsScannerIterator start, end; - - aScanner.CurrentPosition(start); - aScanner.EndReading(end); - - nsresult result=aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE); - - if (NS_SUCCEEDED(result)) { - PRUnichar ch; - aScanner.Peek(ch); - if (ch == kGreaterThan) { - // Include '>' but not '<' since '<' - // could belong to another tag. - aScanner.GetChar(ch); - end.advance(1); - } - } - else if (!aScanner.IsIncremental()) { - // We have reached the document end but haven't - // found either a '<' or a '>'. Therefore use - // whatever we have. - result = NS_OK; - } - - if (NS_SUCCEEDED(result)) { - start.advance(-2); // Make sure to consume <! - CopyUnicodeTo(start,end,mTextValue); - } - - return result; -} - -PRInt32 CDoctypeDeclToken::GetTokenType(void) { - return eToken_doctypeDecl; -} - -const nsAString& CDoctypeDeclToken::GetStringValue(void) -{ - return mTextValue; -} - -void CDoctypeDeclToken::SetStringValue(const nsAString& aStr) -{ - mTextValue.Assign(aStr); -} diff --git a/htmlparser/src/nsLoggingSink.cpp b/htmlparser/src/nsLoggingSink.cpp deleted file mode 100644 index 52c25b57ca0e..000000000000 --- a/htmlparser/src/nsLoggingSink.cpp +++ /dev/null @@ -1,811 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -#include "nsIAtom.h" -#include "nsLoggingSink.h" -#include "nsHTMLTags.h" -#include "nsString.h" -#include "nsReadableUtils.h" -#include "prprf.h" - - -static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID); -static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID); -static NS_DEFINE_IID(kILoggingSinkIID, NS_ILOGGING_SINK_IID); -static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); - -// list of tags that have skipped content -static const char gSkippedContentTags[] = { - eHTMLTag_style, - eHTMLTag_script, - eHTMLTag_server, - eHTMLTag_textarea, - eHTMLTag_title, - 0 -}; - - -nsresult -NS_NewHTMLLoggingSink(nsIContentSink** aInstancePtrResult) -{ - NS_PRECONDITION(nsnull != aInstancePtrResult, "null ptr"); - if (nsnull == aInstancePtrResult) { - return NS_ERROR_NULL_POINTER; - } - nsLoggingSink* it = new nsLoggingSink(); - if (nsnull == it) { - return NS_ERROR_OUT_OF_MEMORY; - } - return it->QueryInterface(kIContentSinkIID, (void**) aInstancePtrResult); -} - -nsLoggingSink::nsLoggingSink() { - mOutput = 0; - mLevel=-1; - mSink=0; - mParser=0; -} - -nsLoggingSink::~nsLoggingSink() { - mSink=0; - if(mOutput && mAutoDeleteOutput) { - delete mOutput; - } - mOutput=0; -} - -NS_IMPL_ADDREF(nsLoggingSink) -NS_IMPL_RELEASE(nsLoggingSink) - -nsresult -nsLoggingSink::QueryInterface(const nsIID& aIID, void** aInstancePtr) -{ - NS_PRECONDITION(nsnull != aInstancePtr, "null ptr"); - if (nsnull == aInstancePtr) { - return NS_ERROR_NULL_POINTER; - } - if (aIID.Equals(kISupportsIID)) { - nsISupports* tmp = this; - *aInstancePtr = (void*) tmp; - } - else if (aIID.Equals(kIContentSinkIID)) { - nsIContentSink* tmp = this; - *aInstancePtr = (void*) tmp; - } - else if (aIID.Equals(kIHTMLContentSinkIID)) { - nsIHTMLContentSink* tmp = this; - *aInstancePtr = (void*) tmp; - } - else if (aIID.Equals(kILoggingSinkIID)) { - nsILoggingSink* tmp = this; - *aInstancePtr = (void*) tmp; - } - else { - *aInstancePtr = nsnull; - return NS_NOINTERFACE; - } - NS_ADDREF(this); - return NS_OK; -} - -NS_IMETHODIMP -nsLoggingSink::SetOutputStream(PRFileDesc *aStream,PRBool autoDeleteOutput) { - mOutput = aStream; - mAutoDeleteOutput=autoDeleteOutput; - return NS_OK; -} - -static -void WriteTabs(PRFileDesc * out,int aTabCount) { - int tabs; - for(tabs=0;tabs<aTabCount;++tabs) - PR_fprintf(out, " "); -} - - -NS_IMETHODIMP -nsLoggingSink::WillBuildModel() { - - WriteTabs(mOutput,++mLevel); - PR_fprintf(mOutput, "<begin>\n"); - - //proxy the call to the real sink if you have one. - if(mSink) { - mSink->WillBuildModel(); - } - - return NS_OK; -} - -NS_IMETHODIMP -nsLoggingSink::DidBuildModel() { - - WriteTabs(mOutput,--mLevel); - PR_fprintf(mOutput, "</begin>\n"); - - //proxy the call to the real sink if you have one. - nsresult theResult=NS_OK; - if(mSink) { - theResult=mSink->DidBuildModel(); - } - - return theResult; -} - -NS_IMETHODIMP -nsLoggingSink::WillInterrupt() { - nsresult theResult=NS_OK; - - //proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->WillInterrupt(); - } - - return theResult; -} - -NS_IMETHODIMP -nsLoggingSink::WillResume() { - nsresult theResult=NS_OK; - - //proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->WillResume(); - } - - return theResult; -} - -NS_IMETHODIMP -nsLoggingSink::SetParser(nsIParser* aParser) { - nsresult theResult=NS_OK; - - //proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->SetParser(aParser); - } - - NS_IF_RELEASE(mParser); - - mParser = aParser; - - NS_IF_ADDREF(mParser); - - return theResult; -} - -NS_IMETHODIMP -nsLoggingSink::OpenContainer(const nsIParserNode& aNode) { - - OpenNode("container", aNode); //do the real logging work... - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->OpenContainer(aNode); - } - - return theResult; - -} - -NS_IMETHODIMP -nsLoggingSink::CloseContainer(const nsHTMLTag aTag) { - - nsresult theResult=NS_OK; - - nsHTMLTag nodeType = nsHTMLTag(aTag); - if ((nodeType >= eHTMLTag_unknown) && - (nodeType <= nsHTMLTag(NS_HTML_TAG_MAX))) { - const PRUnichar* tag = nsHTMLTags::GetStringValue(nodeType); - theResult = CloseNode(NS_ConvertUCS2toUTF8(tag).get()); - } - else theResult= CloseNode("???"); - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->CloseContainer(aTag); - } - - return theResult; - -} - -NS_IMETHODIMP -nsLoggingSink::AddHeadContent(const nsIParserNode& aNode) { - LeafNode(aNode); - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->AddHeadContent(aNode); - } - - return theResult; - -} - -NS_IMETHODIMP -nsLoggingSink::AddLeaf(const nsIParserNode& aNode) { - LeafNode(aNode); - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->AddLeaf(aNode); - } - - return theResult; - -} - -/** - * This gets called by the parser when you want to add - * a PI node to the current container in the content - * model. - * - * @updated gess 3/25/98 - * @param - * @return - */ -NS_IMETHODIMP -nsLoggingSink::AddProcessingInstruction(const nsIParserNode& aNode){ - -#ifdef VERBOSE_DEBUG - DebugDump("<",aNode.GetText(),(mNodeStackPos)*2); -#endif - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->AddProcessingInstruction(aNode); - } - - return theResult; -} - -/** - * This gets called by the parser when it encounters - * a DOCTYPE declaration in the HTML document. - */ - -NS_IMETHODIMP -nsLoggingSink::AddDocTypeDecl(const nsIParserNode& aNode) { - -#ifdef VERBOSE_DEBUG - DebugDump("<",aNode.GetText(),(mNodeStackPos)*2); -#endif - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->AddDocTypeDecl(aNode); - } - - return theResult; - -} - -/** - * This gets called by the parser when you want to add - * a comment node to the current container in the content - * model. - * - * @updated gess 3/25/98 - * @param - * @return - */ -NS_IMETHODIMP -nsLoggingSink::AddComment(const nsIParserNode& aNode){ - -#ifdef VERBOSE_DEBUG - DebugDump("<",aNode.GetText(),(mNodeStackPos)*2); -#endif - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->AddComment(aNode); - } - - return theResult; - -} - - -NS_IMETHODIMP -nsLoggingSink::SetTitle(const nsString& aValue) { - - char* tmp = nsnull; - GetNewCString(aValue, &tmp); - WriteTabs(mOutput,++mLevel); - if(tmp) { - PR_fprintf(mOutput, "<title value=\"%s\"/>\n", tmp); - nsMemory::Free(tmp); - } - --mLevel; - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->SetTitle(aValue); - } - - return theResult; - -} - - -NS_IMETHODIMP -nsLoggingSink::OpenHTML(const nsIParserNode& aNode) { - OpenNode("html", aNode); - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->OpenHTML(aNode); - } - - return theResult; - -} - -NS_IMETHODIMP -nsLoggingSink::CloseHTML() { - CloseNode("html"); - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->CloseHTML(); - } - - return theResult; - -} - -NS_IMETHODIMP -nsLoggingSink::OpenHead(const nsIParserNode& aNode) { - OpenNode("head", aNode); - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->OpenHead(aNode); - } - - return theResult; -} - -NS_IMETHODIMP -nsLoggingSink::CloseHead() { - CloseNode("head"); - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->CloseHead(); - } - - return theResult; -} - -NS_IMETHODIMP -nsLoggingSink::OpenBody(const nsIParserNode& aNode) { - OpenNode("body", aNode); - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->OpenBody(aNode); - } - - return theResult; -} - -NS_IMETHODIMP -nsLoggingSink::CloseBody() { - CloseNode("body"); - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->CloseBody(); - } - - return theResult; -} - -NS_IMETHODIMP -nsLoggingSink::OpenForm(const nsIParserNode& aNode) { - OpenNode("form", aNode); - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->OpenForm(aNode); - } - - return theResult; -} - -NS_IMETHODIMP -nsLoggingSink::CloseForm() { - CloseNode("form"); - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->CloseForm(); - } - - return theResult; -} - -NS_IMETHODIMP -nsLoggingSink::OpenMap(const nsIParserNode& aNode) { - OpenNode("map", aNode); - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->OpenMap(aNode); - } - - return theResult; -} - -NS_IMETHODIMP -nsLoggingSink::CloseMap() { - CloseNode("map"); - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->CloseMap(); - } - - return theResult; -} - -NS_IMETHODIMP -nsLoggingSink::OpenFrameset(const nsIParserNode& aNode) { - OpenNode("frameset", aNode); - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->OpenFrameset(aNode); - } - - return theResult; -} - -NS_IMETHODIMP -nsLoggingSink::CloseFrameset() { - CloseNode("frameset"); - - nsresult theResult=NS_OK; - - //then proxy the call to the real sink if you have one. - if(mSink) { - theResult=mSink->CloseFrameset(); - } - - return theResult; -} - - -nsresult -nsLoggingSink::OpenNode(const char* aKind, const nsIParserNode& aNode) { - WriteTabs(mOutput,++mLevel); - - PR_fprintf(mOutput,"<open container="); - - nsHTMLTag nodeType = nsHTMLTag(aNode.GetNodeType()); - if ((nodeType >= eHTMLTag_unknown) && - (nodeType <= nsHTMLTag(NS_HTML_TAG_MAX))) { - const PRUnichar* tag = nsHTMLTags::GetStringValue(nodeType); - PR_fprintf(mOutput, "\"%s\"", NS_ConvertUCS2toUTF8(tag).get()); - } - else { - char* text; - GetNewCString(aNode.GetText(), &text); - if(text) { - PR_fprintf(mOutput, "\"%s\"", text); - nsMemory::Free(text); - } - } - - if (WillWriteAttributes(aNode)) { - PR_fprintf(mOutput, ">\n"); - WriteAttributes(aNode); - PR_fprintf(mOutput, "</open>\n"); - } - else { - PR_fprintf(mOutput, ">\n"); - } - - return NS_OK; -} - -nsresult -nsLoggingSink::CloseNode(const char* aKind) { - WriteTabs(mOutput,mLevel--); - PR_fprintf(mOutput, "<close container=\"%s\">\n", aKind); - return NS_OK; -} - - -nsresult -nsLoggingSink::WriteAttributes(const nsIParserNode& aNode) { - - WriteTabs(mOutput,1+mLevel); - nsAutoString tmp; - PRInt32 ac = aNode.GetAttributeCount(); - for (PRInt32 i = 0; i < ac; ++i) { - char* key=nsnull; - char* value=nsnull; - const nsAString& k = aNode.GetKeyAt(i); - const nsAString& v = aNode.GetValueAt(i); - - GetNewCString(k, &key); - if(key) { - PR_fprintf(mOutput, " <attr key=\"%s\" value=\"", key); - nsMemory::Free(key); - } - - tmp.Truncate(); - tmp.Append(v); - if(!tmp.IsEmpty()) { - PRUnichar first = tmp.First(); - if ((first == '"') || (first == '\'')) { - if (tmp.Last() == first) { - tmp.Cut(0, 1); - PRInt32 pos = tmp.Length() - 1; - if (pos >= 0) { - tmp.Cut(pos, 1); - } - } else { - // Mismatched quotes - leave them in - } - } - GetNewCString(tmp, &value); - - if(value) { - PR_fprintf(mOutput, "%s\"/>\n", value); - WriteTabs(mOutput,1+mLevel); - nsMemory::Free(value); - } - } - } - - if (0 != strchr(gSkippedContentTags, aNode.GetNodeType())) { - nsCOMPtr<nsIDTD> dtd; - mParser->GetDTD(getter_AddRefs(dtd)); - NS_ENSURE_TRUE(dtd, NS_ERROR_FAILURE); - - nsString theString; - PRInt32 lineNo = 0; - - dtd->CollectSkippedContent(aNode.GetNodeType(), theString, lineNo); - char* content = nsnull; - GetNewCString(theString, &content); - if(content) { - PR_fprintf(mOutput, " <content value=\""); - PR_fprintf(mOutput, "%s\"/>\n", content) ; - nsMemory::Free(content); - } - } - WriteTabs(mOutput,1+mLevel); - return NS_OK; -} - -PRBool -nsLoggingSink::WillWriteAttributes(const nsIParserNode& aNode) -{ - PRInt32 ac = aNode.GetAttributeCount(); - if (0 != ac) { - return PR_TRUE; - } - if (0 != strchr(gSkippedContentTags, aNode.GetNodeType())) { - nsCOMPtr<nsIDTD> dtd; - mParser->GetDTD(getter_AddRefs(dtd)); - NS_ENSURE_TRUE(dtd, NS_ERROR_FAILURE); - - nsString content; - PRInt32 lineNo = 0; - - dtd->CollectSkippedContent(aNode.GetNodeType(), content, lineNo); - if (!content.IsEmpty()) { - return PR_TRUE; - } - } - return PR_FALSE; -} - -nsresult -nsLoggingSink::LeafNode(const nsIParserNode& aNode) -{ - WriteTabs(mOutput,1+mLevel); - nsHTMLTag nodeType = nsHTMLTag(aNode.GetNodeType()); - - if ((nodeType >= eHTMLTag_unknown) && - (nodeType <= nsHTMLTag(NS_HTML_TAG_MAX))) { - const PRUnichar* tag = nsHTMLTags::GetStringValue(nodeType); - - if(tag) - PR_fprintf(mOutput, "<leaf tag=\"%s\"", NS_ConvertUCS2toUTF8(tag).get()); - else PR_fprintf(mOutput, "<leaf tag=\"???\""); - - if (WillWriteAttributes(aNode)) { - PR_fprintf(mOutput, ">\n"); - WriteAttributes(aNode); - PR_fprintf(mOutput, "</leaf>\n"); - } - else { - PR_fprintf(mOutput, "/>\n"); - } - } - else { - PRInt32 pos; - nsAutoString tmp; - char* str; - switch (nodeType) { - case eHTMLTag_whitespace: - case eHTMLTag_text: - GetNewCString(aNode.GetText(), &str); - if(str) { - PR_fprintf(mOutput, "<text value=\"%s\"/>\n", str); - nsMemory::Free(str); - } - break; - - case eHTMLTag_newline: - PR_fprintf(mOutput, "<newline/>\n"); - break; - - case eHTMLTag_entity: - tmp.Append(aNode.GetText()); - tmp.Cut(0, 1); - pos = tmp.Length() - 1; - if (pos >= 0) { - tmp.Cut(pos, 1); - } - PR_fprintf(mOutput, "<entity value=\"%s\"/>\n", NS_LossyConvertUCS2toASCII(tmp).get()); - break; - - default: - NS_NOTREACHED("unsupported leaf node type"); - }//switch - } - return NS_OK; -} - -nsresult -nsLoggingSink::QuoteText(const nsAString& aValue, nsString& aResult) { - aResult.Truncate(); - /* - if you're stepping through the string anyway, why not use iterators instead of forcing the string to copy? - */ - const nsPromiseFlatString& flat = PromiseFlatString(aValue); - const PRUnichar* cp = flat.get(); - const PRUnichar* end = cp + aValue.Length(); - while (cp < end) { - PRUnichar ch = *cp++; - if (ch == '"') { - aResult.Append(NS_LITERAL_STRING(""")); - } - else if (ch == '&') { - aResult.Append(NS_LITERAL_STRING("&")); - } - else if ((ch < 32) || (ch >= 127)) { - aResult.Append(NS_LITERAL_STRING("&#")); - aResult.AppendInt(PRInt32(ch), 10); - aResult.Append(PRUnichar(';')); - } - else { - aResult.Append(ch); - } - } - return NS_OK; -} - -/** - * Use this method to convert nsString to char*. - * REMEMBER: Match this call with nsMemory::Free(aResult); - * - * @update 04/04/99 harishd - * @param aValue - The string value - * @param aResult - String coverted to char*. - */ -nsresult -nsLoggingSink::GetNewCString(const nsAString& aValue, char** aResult) -{ - nsresult result=NS_OK; - nsAutoString temp; - result=QuoteText(aValue,temp); - if(NS_SUCCEEDED(result)) { - if(!temp.IsEmpty()) { - *aResult = ToNewCString(temp); - } - } - return result; -} - -/** - * This gets called when handling illegal contents, especially - * in dealing with tables. This method creates a new context. - * - * @update 04/04/99 harishd - * @param aPosition - The position from where the new context begins. - */ -NS_IMETHODIMP -nsLoggingSink::BeginContext(PRInt32 aPosition) -{ - return NS_OK; -} - -/** - * This method terminates any new context that got created by - * BeginContext and switches back to the main context. - * - * @update 04/04/99 harishd - * @param aPosition - Validates the end of a context. - */ -NS_IMETHODIMP -nsLoggingSink::EndContext(PRInt32 aPosition) -{ - return NS_OK; -} diff --git a/htmlparser/src/nsLoggingSink.h b/htmlparser/src/nsLoggingSink.h deleted file mode 100644 index 80f0f05c1791..000000000000 --- a/htmlparser/src/nsLoggingSink.h +++ /dev/null @@ -1,123 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef NS_LOGGING_SINK_H__ -#define NS_LOGGING_SINK_H__ - -#include "nsILoggingSink.h" -#include "nsIParser.h" - -class nsLoggingSink : public nsILoggingSink { -public: - nsLoggingSink(); - virtual ~nsLoggingSink(); - - void SetProxySink(nsIHTMLContentSink *aSink) { - mSink=aSink; - } - - void ReleaseProxySink() { - NS_IF_RELEASE(mSink); - mSink=0; - } - - - // nsISupports - NS_DECL_ISUPPORTS - - // nsIContentSink - NS_IMETHOD WillBuildModel(); - NS_IMETHOD DidBuildModel(); - NS_IMETHOD WillInterrupt(); - NS_IMETHOD WillResume(); - NS_IMETHOD SetParser(nsIParser* aParser); - NS_IMETHOD OpenContainer(const nsIParserNode& aNode); - NS_IMETHOD CloseContainer(const nsHTMLTag aTag); - NS_IMETHOD AddHeadContent(const nsIParserNode& aNode); - NS_IMETHOD AddLeaf(const nsIParserNode& aNode); - NS_IMETHOD AddComment(const nsIParserNode& aNode); - NS_IMETHOD AddProcessingInstruction(const nsIParserNode& aNode); - NS_IMETHOD AddDocTypeDecl(const nsIParserNode& aNode); - NS_IMETHOD FlushPendingNotifications() { return NS_OK; } - NS_IMETHOD SetDocumentCharset(nsACString& aCharset) { return NS_OK; } - NS_IMETHOD NotifyTagObservers(nsIParserNode* aNode) { return NS_OK; } - - // nsIHTMLContentSink - NS_IMETHOD SetTitle(const nsString& aValue); - NS_IMETHOD OpenHTML(const nsIParserNode& aNode); - NS_IMETHOD CloseHTML(); - NS_IMETHOD OpenHead(const nsIParserNode& aNode); - NS_IMETHOD CloseHead(); - NS_IMETHOD OpenBody(const nsIParserNode& aNode); - NS_IMETHOD CloseBody(); - NS_IMETHOD OpenForm(const nsIParserNode& aNode); - NS_IMETHOD CloseForm(); - NS_IMETHOD OpenMap(const nsIParserNode& aNode); - NS_IMETHOD CloseMap(); - NS_IMETHOD OpenFrameset(const nsIParserNode& aNode); - NS_IMETHOD CloseFrameset(); - NS_IMETHOD IsEnabled(PRInt32 aTag, PRBool* aReturn) { return NS_OK; } - NS_IMETHOD_(PRBool) IsFormOnStack() { return PR_FALSE; } - - NS_IMETHOD BeginContext(PRInt32 aPosition); - NS_IMETHOD EndContext(PRInt32 aPosition); - NS_IMETHOD WillProcessTokens(void) { return NS_OK; } - NS_IMETHOD DidProcessTokens(void) { return NS_OK; } - NS_IMETHOD WillProcessAToken(void) { return NS_OK; } - NS_IMETHOD DidProcessAToken(void) { return NS_OK; } - - // nsILoggingSink - NS_IMETHOD SetOutputStream(PRFileDesc *aStream,PRBool autoDelete=PR_FALSE); - - nsresult OpenNode(const char* aKind, const nsIParserNode& aNode); - nsresult CloseNode(const char* aKind); - nsresult LeafNode(const nsIParserNode& aNode); - nsresult WriteAttributes(const nsIParserNode& aNode); - nsresult QuoteText(const nsAString& aValue, nsString& aResult); - nsresult GetNewCString(const nsAString& aValue, char** aResult); - PRBool WillWriteAttributes(const nsIParserNode& aNode); - -protected: - PRFileDesc *mOutput; - int mLevel; - nsIHTMLContentSink *mSink; - PRBool mAutoDeleteOutput; - nsIParser* mParser; -}; - -#endif - diff --git a/htmlparser/src/nsParser.cpp b/htmlparser/src/nsParser.cpp deleted file mode 100644 index 16c9b56f545b..000000000000 --- a/htmlparser/src/nsParser.cpp +++ /dev/null @@ -1,2651 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Pierre Phaneuf <pp@ludusdesign.com> - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#define XMLENCODING_PEEKBYTES 64 -#define DISABLE_TRANSITIONAL_MODE - - - -#include "nsIAtom.h" -#include "nsParser.h" -#include "nsString.h" -#include "nsCRT.h" -#include "nsScanner.h" -#include "plstr.h" -#include "nsIStringStream.h" -#include "nsIChannel.h" -#include "nsICachingChannel.h" -#include "nsICacheEntryDescriptor.h" -#include "nsICharsetAlias.h" -#include "nsIInputStream.h" -#include "CNavDTD.h" -#include "COtherDTD.h" -#include "prenv.h" -#include "nsParserCIID.h" -#include "nsReadableUtils.h" -#include "nsCOMPtr.h" -#include "nsIEventQueue.h" -#include "nsIEventQueueService.h" -#include "nsExpatDriver.h" -#include "nsIServiceManager.h" -//#define rickgdebug - -#ifdef MOZ_VIEW_SOURCE -#include "nsViewSourceHTML.h" -#endif - -#define NS_PARSER_FLAG_DTD_VERIFICATION 0x00000001 -#define NS_PARSER_FLAG_PARSER_ENABLED 0x00000002 -#define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004 -#define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008 -#define NS_PARSER_FLAG_CAN_INTERRUPT 0x00000010 -#define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020 -#define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040 - -static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); -static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID); -static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID); - -static NS_DEFINE_CID(kEventQueueServiceCID, NS_EVENTQUEUESERVICE_CID); - -//------------------------------------------------------------------- - - -class CDTDDeallocator: public nsDequeFunctor{ -public: - virtual void* operator()(void* anObject) { - nsIDTD* aDTD =(nsIDTD*)anObject; - NS_RELEASE(aDTD); - return 0; - } -}; - -//------------------------------------------------------------------- - -class CDTDFinder: public nsDequeFunctor{ -public: - CDTDFinder(nsIDTD* aDTD) { - mTargetDTD=aDTD; - } - virtual ~CDTDFinder() { - } - virtual void* operator()(void* anObject) { - nsIDTD* theDTD=(nsIDTD*)anObject; - if(theDTD->GetMostDerivedIID().Equals(mTargetDTD->GetMostDerivedIID())) - return anObject; - return 0; - } - nsIDTD* mTargetDTD; -}; - -//------------------------------------------------------------------- - -class CSharedParserObjects { -public: - - CSharedParserObjects() - :mDTDDeque(0), - mHasViewSourceDTD(PR_FALSE), - mHasXMLDTD(PR_FALSE) - { - // do nothing. - } - - ~CSharedParserObjects() { - CDTDDeallocator theDeallocator; - mDTDDeque.ForEach(theDeallocator); //release all the DTD's - } - - nsresult Init() { - //Note: To cut down on startup time/overhead, we defer the construction of non-html DTD's. - nsIDTD* theDTD = 0; - nsresult rv = NS_NewNavHTMLDTD(&theDTD); //do this as a default HTML DTD... - - NS_ASSERTION(theDTD, "Failed to create DTD"); - NS_ENSURE_SUCCESS(rv, rv); - - mDTDDeque.Push(theDTD); - mHasViewSourceDTD = PR_FALSE; - mHasXMLDTD = PR_FALSE; - return NS_OK; - } - - nsresult RegisterDTD(nsIDTD* aDTD) { - NS_ENSURE_ARG_POINTER(aDTD); - nsCOMPtr<nsIDTD> dtd(aDTD); - CDTDFinder theFinder(dtd); - if (!mDTDDeque.FirstThat(theFinder)) { - nsIDTD* theDTD; - nsresult rv = dtd->CreateNewInstance(&theDTD); - NS_ENSURE_SUCCESS(rv, rv); - mDTDDeque.Push(theDTD); - } - return NS_OK; - } - - nsDeque mDTDDeque; - PRBool mHasViewSourceDTD; //this allows us to defer construction of this object. - PRBool mHasXMLDTD; //also defer XML dtd construction -}; - - -//-------------- Begin ParseContinue Event Definition ------------------------ -/* -The parser can be explicitly interrupted by passing a return value of NS_ERROR_HTMLPARSER_INTERRUPTED -from BuildModel on the DTD. This will cause the parser to stop processing and allow -the application to return to the event loop. The data which was left at the time of -interruption will be processed the next time OnDataAvailable is called. If the parser -has received its final chunk of data then OnDataAvailable will no longer be called by the -networking module, so the parser will schedule a nsParserContinueEvent which will call -the parser to process the remaining data after returning to the event loop. If the parser -is interrupted while processing the remaining data it will schedule another -ParseContinueEvent. The processing of data followed by scheduling of the continue events -will proceed until either: - - 1) All of the remaining data can be processed without interrupting - 2) The parser has been cancelled. - - -This capability is currently used in CNavDTD and nsHTMLContentSink. The nsHTMLContentSink is -notified by CNavDTD when a chunk of tokens is going to be processed and when each token -is processed. The nsHTML content sink records the time when the chunk has started -processing and will return NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time -has exceeded a threshold called max tokenizing processing time. This allows the content -sink to limit how much data is processed in a single chunk which in turn gates how much -time is spent away from the event loop. Processing smaller chunks of data also reduces -the time spent in subsequent reflows. - -This capability is most apparent when loading large documents. If the maximum token -processing time is set small enough the application will remain responsive during -document load. - -A side-effect of this capability is that document load is not complete when the last chunk -of data is passed to OnDataAvailable since the parser may have been interrupted when -the last chunk of data arrived. The document is complete when all of the document has -been tokenized and there aren't any pending nsParserContinueEvents. This can cause -problems if the application assumes that it can monitor the load requests to determine -when the document load has been completed. This is what happens in Mozilla. The document -is considered completely loaded when all of the load requests have been satisfied. To delay the -document load until all of the parsing has been completed the nsHTMLContentSink adds a -dummy parser load request which is not removed until the nsHTMLContentSink's DidBuildModel -is called. The CNavDTD will not call DidBuildModel until the final chunk of data has been -passed to the parser through the OnDataAvailable and there aren't any pending -nsParserContineEvents. - -Currently the parser is ignores requests to be interrupted during the processing of script. -This is because a document.write followed by JavaScript calls to manipulate the DOM may -fail if the parser was interrupted during the document.write. - - -For more details @see bugzilla bug 76722 -*/ - - -struct nsParserContinueEvent : public PLEvent { - - nsParserContinueEvent(nsParser* aParser) - { - NS_ADDREF(aParser); - PL_InitEvent(this, aParser, HandleEvent, DestroyEvent); - } - - ~nsParserContinueEvent() - { - nsParser *parser = (nsParser*) owner; - NS_RELEASE(parser); - } - - PR_STATIC_CALLBACK(void*) HandleEvent(PLEvent* aEvent) - { - nsParser *parser = (nsParser*) aEvent->owner; - parser->HandleParserContinueEvent(); - return nsnull; - } - - PR_STATIC_CALLBACK(void) DestroyEvent(PLEvent* aEvent) - { - delete (nsParserContinueEvent*) aEvent; - } -}; - -//-------------- End ParseContinue Event Definition ------------------------ - - -static CSharedParserObjects* gSharedParserObjects=0; - - -//------------------------------------------------------------------------- - -nsresult -GetSharedObjects(CSharedParserObjects** aSharedParserObjects) { - if (!gSharedParserObjects) { - gSharedParserObjects = new CSharedParserObjects(); - NS_ENSURE_TRUE(gSharedParserObjects, NS_ERROR_OUT_OF_MEMORY); - nsresult rv = gSharedParserObjects->Init(); - NS_ENSURE_SUCCESS(rv, rv); - } - *aSharedParserObjects = gSharedParserObjects; - return NS_OK; -} - -/** - * This gets called when the htmlparser module is shutdown. - * - * @update gess 01/04/99 - */ -void nsParser::FreeSharedObjects(void) { - if (gSharedParserObjects) { - delete gSharedParserObjects; - gSharedParserObjects=0; - } -} - - -#ifdef DEBUG -static PRBool gDumpContent=PR_FALSE; -#endif - -/** - * default constructor - * - * @update gess 01/04/99 - * @param - * @return - */ -nsParser::nsParser() { -#ifdef NS_DEBUG - if(!gDumpContent) { - gDumpContent=(PR_GetEnv("PARSER_DUMP_CONTENT"))? PR_TRUE:PR_FALSE; - } -#endif - - mCharset.Assign(NS_LITERAL_CSTRING("ISO-8859-1")); - mParserContext=0; - mStreamStatus=0; - mCharsetSource=kCharsetUninitialized; - mInternalState=NS_OK;; - mCommand=eViewNormal; - mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED | NS_PARSER_FLAG_PARSER_ENABLED | NS_PARSER_FLAG_CAN_TOKENIZE; - - MOZ_TIMER_DEBUGLOG(("Reset: Parse Time: nsParser::nsParser(), this=%p\n", this)); - MOZ_TIMER_RESET(mParseTime); - MOZ_TIMER_RESET(mDTDTime); - MOZ_TIMER_RESET(mTokenizeTime); - - nsresult rv = NS_OK; - if (mEventQueue == nsnull) { - // Cache the event queue of the current UI thread - nsCOMPtr<nsIEventQueueService> eventService = - do_GetService(kEventQueueServiceCID, &rv); - if (NS_SUCCEEDED(rv) && (eventService)) { // XXX this implies that the UI is the current thread. - rv = eventService->GetThreadEventQueue(NS_CURRENT_THREAD, getter_AddRefs(mEventQueue)); - } - - // NS_ASSERTION(mEventQueue, "event queue is null"); - } -} - -/** - * Default destructor - * - * @update gess 01/04/99 - * @param - * @return - */ -nsParser::~nsParser() { - -#ifdef NS_DEBUG - if(gDumpContent) { - if(mSink) { - // Sink ( HTMLContentSink at this time) supports nsIDebugDumpContent - // interface. We can get to the content model through the sink. - nsresult result=NS_OK; - nsCOMPtr<nsIDebugDumpContent> trigger=do_QueryInterface(mSink,&result); - if(NS_SUCCEEDED(result)) { - trigger->DumpContentModel(); - } - } - } -#endif - - //don't forget to add code here to delete - //what may be several contexts... - delete mParserContext; - - if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) { - NS_ASSERTION(mEventQueue != nsnull,"Event queue is null"); - mEventQueue->RevokeEvents(this); - } -} - - -NS_IMPL_ADDREF(nsParser) -NS_IMPL_RELEASE(nsParser) - - -/** - * This method gets called as part of our COM-like interfaces. - * Its purpose is to create an interface to parser object - * of some type. - * - * @update gess 01/04/99 - * @param nsIID id of object to discover - * @param aInstancePtr ptr to newly discovered interface - * @return NS_xxx result code - */ -nsresult nsParser::QueryInterface(const nsIID& aIID, void** aInstancePtr) -{ - if (NULL == aInstancePtr) { - return NS_ERROR_NULL_POINTER; - } - - if(aIID.Equals(kISupportsIID)) { //do IUnknown... - *aInstancePtr = (nsIParser*)(this); - } - else if(aIID.Equals(kIParserIID)) { //do IParser base class... - *aInstancePtr = (nsIParser*)(this); - } - else if(aIID.Equals(NS_GET_IID(nsIRequestObserver))) { - *aInstancePtr = (nsIRequestObserver*)(this); - } - else if(aIID.Equals(NS_GET_IID(nsIStreamListener))) { - *aInstancePtr = (nsIStreamListener*)(this); - } - else if(aIID.Equals(kCParserCID)) { //do this class... - *aInstancePtr = (nsParser*)(this); - } - else { - *aInstancePtr=0; - return NS_NOINTERFACE; - } - NS_ADDREF_THIS(); - return NS_OK; -} - -// The parser continue event is posted only if -// all of the data to parse has been passed to ::OnDataAvailable -// and the parser has been interrupted by the content sink -// because the processing of tokens took too long. - -nsresult -nsParser::PostContinueEvent() -{ - if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) && mEventQueue) { - nsParserContinueEvent* ev = new nsParserContinueEvent(this); - NS_ENSURE_TRUE(ev, NS_ERROR_OUT_OF_MEMORY); - if (NS_FAILED(mEventQueue->PostEvent(ev))) { - NS_ERROR("failed to post parser continuation event"); - PL_DestroyEvent(ev); - } - else - mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; - } - return NS_OK; -} - - -/** - * - * @update gess 01/04/99 - * @param - * @return - */ -NS_IMETHODIMP_(void) nsParser::SetParserFilter(nsIParserFilter * aFilter) -{ - mParserFilter = aFilter; -} - - -NS_IMETHODIMP_(void) nsParser::GetCommand(nsString& aCommand) -{ - aCommand = mCommandStr; -} - -/** - * Call this method once you've created a parser, and want to instruct it - * about the command which caused the parser to be constructed. For example, - * this allows us to select a DTD which can do, say, view-source. - * - * @update gess 01/04/99 - * @param aCommand the command string to set - */ -NS_IMETHODIMP_(void) nsParser::SetCommand(const char* aCommand) -{ - nsCAutoString theCommand(aCommand); - if(theCommand.Equals(kViewSourceCommand)) - mCommand=eViewSource; - else if(theCommand.Equals(kViewFragmentCommand)) - mCommand=eViewFragment; - else mCommand=eViewNormal; - mCommandStr.AssignWithConversion(aCommand); -} - -/** - * Call this method once you've created a parser, and want to instruct it - * about the command which caused the parser to be constructed. For example, - * this allows us to select a DTD which can do, say, view-source. - * - * @update gess 01/04/99 - * @param aParserCommand the command to set - */ -NS_IMETHODIMP_(void) nsParser::SetCommand(eParserCommands aParserCommand) -{ - mCommand = aParserCommand; -} - - -/** - * Call this method once you've created a parser, and want to instruct it - * about what charset to load - * - * @update ftang 4/23/99 - * @param aCharset- the charset of a document - * @param aCharsetSource- the source of the charset - * @return nada - */ -NS_IMETHODIMP_(void) -nsParser::SetDocumentCharset(const nsACString& aCharset, PRInt32 aCharsetSource) -{ - mCharset = aCharset; - mCharsetSource = aCharsetSource; - if(mParserContext && mParserContext->mScanner) - mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource); -} - -void nsParser::SetSinkCharset(nsACString& aCharset) -{ - if (mSink) { - mSink->SetDocumentCharset(aCharset); - } -} - -/** - * This method gets called in order to set the content - * sink for this parser to dump nodes to. - * - * @update gess 01/04/99 - * @param nsIContentSink interface for node receiver - * @return - */ -NS_IMETHODIMP_(void) nsParser::SetContentSink(nsIContentSink* aSink) -{ - NS_PRECONDITION(aSink,"sink cannot be null!"); - mSink = aSink; - - if (mSink) { - mSink->SetParser(this); - } -} - -/** - * retrive the sink set into the parser - * @update gess5/11/98 - * @return current sink - */ -NS_IMETHODIMP_(nsIContentSink*) nsParser::GetContentSink(void) -{ - return mSink; -} - -/** - * Call this method when you want to - * register your dynamic DTD's with the parser. - * - * @update gess 01/04/99 - * @param aDTD is the object to be registered. - * @return nothing. - */ -NS_IMETHODIMP -nsParser::RegisterDTD(nsIDTD* aDTD) -{ - CSharedParserObjects* sharedObjects; - nsresult rv = GetSharedObjects(&sharedObjects); - NS_ENSURE_SUCCESS(rv, rv); - return sharedObjects->RegisterDTD(aDTD); -} - -/** - * Retrieve parsemode from topmost parser context - * - * @update gess 01/04/99 - * @return parsemode - */ -NS_IMETHODIMP_(nsDTDMode) nsParser::GetParseMode(void) -{ - if(mParserContext) - return mParserContext->mDTDMode; - NS_NOTREACHED("no parser context"); - return eDTDMode_unknown; -} - - -template <class CharT> -class CWordTokenizer { -public: - CWordTokenizer(const CharT* aBuffer,PRInt32 aStartOffset,PRInt32 aMaxOffset) { - mLength=0; - mOffset=aStartOffset; - mMaxOffset=aMaxOffset; - mBuffer=aBuffer; - mEndBuffer=mBuffer+mMaxOffset; - } - - //******************************************************************************** - // Get offset of nth word in string. - // We define words as: - // 1) sequence of alphanum; - // 2) quoted substring - // 3) SGML comment -- ... -- - // Returns offset of nth word, or -1 (if out of words). - //******************************************************************************** - - PRInt32 GetNextWord(PRBool aSkipQuotes=PR_FALSE) { - - if(mOffset == kNotFound) { - return kNotFound; // Ref. bug 89732 - } - - if (mOffset >= 0) { - const CharT *cp=mBuffer+mOffset+mLength; //skip last word - - mLength=0; //reset this - mOffset=-1; //reset this - - //now skip whitespace... - - CharT target=0; - PRBool done=PR_FALSE; - - while((!done) && (cp++<mEndBuffer)) { - switch(*cp) { - case kSpace: case kNewLine: - case kCR: case kTab: - case kEqual: - continue; - - case kQuote: - target=*cp; - if (aSkipQuotes) { - ++cp; - } - done=PR_TRUE; - break; - - case kMinus: - target=*cp; - done=PR_TRUE; - break; - - default: - done=PR_TRUE; - break; - } - } - - if(cp<mEndBuffer) { - - const CharT *firstcp=cp; //hang onto this... - PRInt32 theDashCount=2; - - ++cp; //just skip first letter to simplify processing... - - //ok, now find end of this word - while(cp++<mEndBuffer) { - if(kQuote==target) { - if(kQuote==*cp) { - ++cp; - break; //we found our end... - } - } - else if(kMinus==target) { - //then let's look for SGML comments - if(kMinus==*cp) { - if(4==++theDashCount) { - ++cp; - break; - } - } - } - else { - if((kSpace==*cp) || - (kNewLine==*cp) || - (kGreaterThan==*cp) || - (kQuote==*cp) || - (kCR==*cp) || - (kTab==*cp) || - (kEqual == *cp)) { - break; - } - } - } - - mLength=cp-firstcp; - mOffset = (0<mLength) ? firstcp-mBuffer : -1; - - } - } - - return mOffset; - } - - PRInt32 GetLength() const { - return mLength; - } - - PRInt32 mOffset; - PRInt32 mMaxOffset; - PRInt32 mLength; - const CharT* mBuffer; - const CharT* mEndBuffer; -}; - -/** - * Determine what DTD mode (and thus what layout nsCompatibility mode) - * to use for this document based on the first chunk of data recieved - * from the network (each parsercontext can have its own mode). (No, - * this is not an optimal solution -- we really don't need to know until - * after we've received the DOCTYPE, and this could easily be part of - * the regular parsing process if the parser were designed in a way that - * made such modifications easy.) - */ - -// Parse the PS production in the SGML spec (excluding the part dealing -// with entity references) starting at theIndex into theBuffer, and -// return the first index after the end of the production. -static PRInt32 ParsePS(const nsString& aBuffer, PRInt32 aIndex) -{ - for(;;) { - PRUnichar ch = aBuffer.CharAt(aIndex); - if ((ch == PRUnichar(' ')) || (ch == PRUnichar('\t')) || - (ch == PRUnichar('\n')) || (ch == PRUnichar('\r'))) { - ++aIndex; - } else if (ch == PRUnichar('-')) { - PRInt32 tmpIndex; - if (aBuffer.CharAt(aIndex+1) == PRUnichar('-') && - kNotFound != (tmpIndex=aBuffer.Find("--",PR_FALSE,aIndex+2,-1))) { - aIndex = tmpIndex + 2; - } else { - return aIndex; - } - } else { - return aIndex; - } - } -} - -#define PARSE_DTD_HAVE_DOCTYPE (1<<0) -#define PARSE_DTD_HAVE_PUBLIC_ID (1<<1) -#define PARSE_DTD_HAVE_SYSTEM_ID (1<<2) -#define PARSE_DTD_HAVE_INTERNAL_SUBSET (1<<3) - -// return PR_TRUE on success (includes not present), PR_FALSE on failure -static PRBool ParseDocTypeDecl(const nsString &aBuffer, - PRInt32 *aResultFlags, - nsString &aPublicID, - nsString &aSystemID) -{ - PRBool haveDoctype = PR_FALSE; - *aResultFlags = 0; - - // Skip through any comments and processing instructions - // The PI-skipping is a bit of a hack. - PRInt32 theIndex = 0; - do { - theIndex = aBuffer.FindChar('<', theIndex); - if (theIndex == kNotFound) break; - PRUnichar nextChar = aBuffer.CharAt(theIndex+1); - if (nextChar == PRUnichar('!')) { - PRInt32 tmpIndex = theIndex; - if (kNotFound != - (theIndex=aBuffer.Find("DOCTYPE", PR_TRUE, theIndex+2, 1))) { - haveDoctype = PR_TRUE; - theIndex += 7; // skip "DOCTYPE" - break; - } - theIndex = ParsePS(aBuffer,tmpIndex); - // -1, not 0, in case it's another markup declaration - theIndex = aBuffer.FindChar('>', theIndex); - } else if (nextChar == PRUnichar('?')) { - theIndex = aBuffer.FindChar('>', theIndex); - } else { - break; - } - } while (theIndex != kNotFound); - - if (!haveDoctype) - return PR_TRUE; - *aResultFlags |= PARSE_DTD_HAVE_DOCTYPE; - - theIndex = ParsePS(aBuffer, theIndex); - theIndex = aBuffer.Find("HTML", PR_TRUE, theIndex, 1); - if(kNotFound == theIndex) - return PR_FALSE; - theIndex = ParsePS(aBuffer, theIndex+4); - PRInt32 tmpIndex = aBuffer.Find("PUBLIC", PR_TRUE, theIndex, 1); - - if (kNotFound != tmpIndex) { - theIndex = ParsePS(aBuffer, tmpIndex+6); - - // We get here only if we've read <!DOCTYPE HTML PUBLIC - // (not case sensitive) possibly with comments within. - - // Now find the beginning and end of the public identifier - // and the system identifier (if present). - - PRUnichar lit = aBuffer.CharAt(theIndex); - if ((lit != PRUnichar('\"')) && (lit != PRUnichar('\''))) - return PR_FALSE; - - // Start is the first character, excluding the quote, and End is - // the final quote, so there are (end-start) characters. - - PRInt32 PublicIDStart = theIndex + 1; - PRInt32 PublicIDEnd = aBuffer.FindChar(lit, PublicIDStart); - if (kNotFound == PublicIDEnd) - return PR_FALSE; - theIndex = ParsePS(aBuffer, PublicIDEnd + 1); - PRUnichar next = aBuffer.CharAt(theIndex); - if (next == PRUnichar('>')) { - // There was a public identifier, but no system - // identifier, - // so do nothing. - // This is needed to avoid the else at the end, and it's - // also the most common case. - } else if ((next == PRUnichar('\"')) || - (next == PRUnichar('\''))) { - // We found a system identifier. - *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID; - PRInt32 SystemIDStart = theIndex + 1; - PRInt32 SystemIDEnd = aBuffer.FindChar(next, SystemIDStart); - if (kNotFound == SystemIDEnd) - return PR_FALSE; - aSystemID = - Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart); - } else if (next == PRUnichar('[')) { - // We found an internal subset. - *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET; - } else { - // Something's wrong. - return PR_FALSE; - } - - // Since a public ID is a minimum literal, we must trim - // and collapse whitespace - aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart); - aPublicID.CompressWhitespace(PR_TRUE, PR_TRUE); - *aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID; - } else { - tmpIndex=aBuffer.Find("SYSTEM", PR_TRUE, theIndex, 1); - if (kNotFound != tmpIndex) { - // DOCTYPES with system ID but no Public ID - *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID; - - theIndex = ParsePS(aBuffer, tmpIndex+6); - PRUnichar next = aBuffer.CharAt(theIndex); - if (next != PRUnichar('\"') && next != PRUnichar('\'')) - return PR_FALSE; - - PRInt32 SystemIDStart = theIndex + 1; - PRInt32 SystemIDEnd = aBuffer.FindChar(next, SystemIDStart); - - if (kNotFound == SystemIDEnd) - return PR_FALSE; - aSystemID = - Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart); - theIndex = ParsePS(aBuffer, SystemIDEnd + 1); - } - - PRUnichar nextChar = aBuffer.CharAt(theIndex); - if (nextChar == PRUnichar('[')) - *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET; - else if (nextChar != PRUnichar('>')) - return PR_FALSE; - } - return PR_TRUE; -} - -struct PubIDInfo { - enum eMode { - eQuirks, /* always quirks mode, unless there's an internal subset */ - eQuirks3, /* ditto, but but pre-HTML4 (no tbody) */ - eAlmostStandards,/* eCompatibility_AlmostStandards */ - eFullStandards /* eCompatibility_FullStandards */ - /* - * public IDs that should trigger strict mode are not listed - * since we want all future public IDs to trigger strict mode as - * well - */ - }; - - const char* name; - eMode mode_if_no_sysid; - eMode mode_if_sysid; -}; - -#define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0])) - -// These must be in nsCRT::strcmp order so binary-search can be used. -// This is verified, |#ifdef DEBUG|, below. - -// Even though public identifiers should be case sensitive, we will do -// all comparisons after converting to lower case in order to do -// case-insensitive comparison since there are a number of existing web -// sites that use the incorrect case. Therefore all of the public -// identifiers below are in lower case (with the correct case following, -// in comments). The case is verified, |#ifdef DEBUG|, below. -static const PubIDInfo kPublicIDs[] = { - {"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards}, - {"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards}, - {"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards}, - {"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards}, - {"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, - {"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, - {"html" /* "HTML" */, PubIDInfo::eQuirks3, PubIDInfo::eQuirks3}, -}; - -#ifdef DEBUG -static void VerifyPublicIDs() -{ - static PRBool gVerified = PR_FALSE; - if (!gVerified) { - gVerified = PR_TRUE; - PRUint32 i; - for (i = 0; i < ELEMENTS_OF(kPublicIDs) - 1; ++i) { - if (nsCRT::strcmp(kPublicIDs[i].name, kPublicIDs[i+1].name) >= 0) { - NS_NOTREACHED("doctypes out of order"); - printf("Doctypes %s and %s out of order.\n", - kPublicIDs[i].name, kPublicIDs[i+1].name); - } - } - for (i = 0; i < ELEMENTS_OF(kPublicIDs); ++i) { - nsCAutoString lcPubID(kPublicIDs[i].name); - ToLowerCase(lcPubID); - if (nsCRT::strcmp(kPublicIDs[i].name, lcPubID.get()) != 0) { - NS_NOTREACHED("doctype not lower case"); - printf("Doctype %s not lower case.\n", kPublicIDs[i].name); - } - } - } -} -#endif - -static void DetermineHTMLParseMode(const nsString& aBuffer, - nsDTDMode& aParseMode, - eParserDocType& aDocType) -{ -#ifdef DEBUG - VerifyPublicIDs(); -#endif - PRInt32 resultFlags; - nsAutoString publicIDUCS2, sysIDUCS2; - if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) { - if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) { - - // no DOCTYPE - aParseMode = eDTDMode_quirks; - aDocType = eHTML_Quirks; - - } else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) || - !(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) { - - // A doctype with an internal subset is always full_standards. - // A doctype without a public ID is always full_standards. - aDocType = eHTML_Strict; - aParseMode = eDTDMode_full_standards; - - // Special hack for IBM's custom DOCTYPE. - if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) && - sysIDUCS2 == NS_LITERAL_STRING( - "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) { - aParseMode = eDTDMode_quirks; - aDocType = eHTML_Quirks; - } - - } else { - - // We have to check our list of public IDs to see what to do. - - // Yes, we want UCS2 to ASCII lossy conversion. - nsCAutoString publicID; - publicID.AssignWithConversion(publicIDUCS2); - - // See comment above definition of kPublicIDs about case - // sensitivity. - ToLowerCase(publicID); - - // binary search to see if we can find the correct public ID - // These must be signed since maximum can go below zero and we'll - // crash if it's unsigned. - PRInt32 minimum = 0; - PRInt32 maximum = ELEMENTS_OF(kPublicIDs) - 1; - PRInt32 index; - for (;;) { - index = (minimum + maximum) / 2; - PRInt32 comparison = - nsCRT::strcmp(publicID.get(), kPublicIDs[index].name); - if (comparison == 0) - break; - if (comparison < 0) - maximum = index - 1; - else - minimum = index + 1; - - if (maximum < minimum) { - // The DOCTYPE is not in our list, so it must be full_standards. - aParseMode = eDTDMode_full_standards; - aDocType = eHTML_Strict; - return; - } - } - - switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID) - ? kPublicIDs[index].mode_if_sysid - : kPublicIDs[index].mode_if_no_sysid) - { - case PubIDInfo::eQuirks3: - aParseMode = eDTDMode_quirks; - aDocType = eHTML3_Quirks; - break; - case PubIDInfo::eQuirks: - aParseMode = eDTDMode_quirks; - aDocType = eHTML_Quirks; - break; - case PubIDInfo::eAlmostStandards: - aParseMode = eDTDMode_almost_standards; - aDocType = eHTML_Strict; - break; - case PubIDInfo::eFullStandards: - aParseMode = eDTDMode_full_standards; - aDocType = eHTML_Strict; - break; - default: - NS_NOTREACHED("no other cases!"); - } - - } - } else { - // badly formed DOCTYPE -> quirks - aParseMode = eDTDMode_quirks; - aDocType = eHTML3_Quirks; - } -} - -static -void DetermineParseMode(const nsString& aBuffer, - nsDTDMode& aParseMode, - eParserDocType& aDocType, - const nsACString& aMimeType) -{ - if (aMimeType.Equals(NS_LITERAL_CSTRING(kHTMLTextContentType))) { - DetermineHTMLParseMode(aBuffer, aParseMode, aDocType); - } else if (aMimeType.Equals(NS_LITERAL_CSTRING(kPlainTextContentType)) || - aMimeType.Equals(NS_LITERAL_CSTRING(kTextCSSContentType)) || - aMimeType.Equals(NS_LITERAL_CSTRING(kApplicationJSContentType)) || - aMimeType.Equals(NS_LITERAL_CSTRING(kTextJSContentType))) { - aDocType = ePlainText; - aParseMode = eDTDMode_quirks; - } else { // Some form of XML - aDocType = eXML; - aParseMode = eDTDMode_full_standards; - } -} - -/** - * - * - * @update gess 5/13/98 - * @param - * @return - */ -static -nsresult -FindSuitableDTD(CParserContext& aParserContext, - const nsString& aBuffer, - PRBool* aReturn) -{ - *aReturn = PR_FALSE; - //Let's start by trying the defaultDTD, if one exists... - if(aParserContext.mDTD) - if(aParserContext.mDTD->CanParse(aParserContext,aBuffer,0)) - return PR_TRUE; - - CSharedParserObjects* sharedObjects; - nsresult rv = GetSharedObjects(&sharedObjects); - NS_ENSURE_SUCCESS(rv, rv); - - aParserContext.mAutoDetectStatus = eUnknownDetect; - PRInt32 theDTDIndex = 0; - nsIDTD* theBestDTD = 0; - nsIDTD* theDTD = 0; - PRBool thePrimaryFound = PR_FALSE; - - while ((theDTDIndex <= sharedObjects->mDTDDeque.GetSize()) && - (aParserContext.mAutoDetectStatus != ePrimaryDetect)){ - theDTD = NS_STATIC_CAST(nsIDTD*, sharedObjects->mDTDDeque.ObjectAt(theDTDIndex++)); - if (theDTD) { - // Store detect status in temp ( theResult ) to avoid bugs such as - // 36233, 36754, 36491, 36323. Basically, we should avoid calling DTD's - // WillBuildModel() multiple times, i.e., we shouldn't leave auto-detect-status - // unknown. - eAutoDetectResult theResult = theDTD->CanParse(aParserContext,aBuffer,0); - if (eValidDetect == theResult){ - aParserContext.mAutoDetectStatus = eValidDetect; - theBestDTD = theDTD; - } - else if (ePrimaryDetect == theResult) { - theBestDTD = theDTD; - thePrimaryFound = PR_TRUE; - aParserContext.mAutoDetectStatus = ePrimaryDetect; - } - } - if (theDTDIndex == sharedObjects->mDTDDeque.GetSize() && !thePrimaryFound) { - if (!sharedObjects->mHasXMLDTD) { - rv = NS_NewExpatDriver(&theDTD); //do this to view XML files... - NS_ENSURE_SUCCESS(rv, rv); - - sharedObjects->mDTDDeque.Push(theDTD); - sharedObjects->mHasXMLDTD = PR_TRUE; - } -#ifdef MOZ_VIEW_SOURCE - else if (!sharedObjects->mHasViewSourceDTD) { - rv = NS_NewViewSourceHTML(&theDTD); //do this so all non-html files can be viewed... - NS_ENSURE_SUCCESS(rv, rv); - - sharedObjects->mDTDDeque.Push(theDTD); - sharedObjects->mHasViewSourceDTD = PR_TRUE; - } -#endif - } - } - - if(theBestDTD) { - rv = theBestDTD->CreateNewInstance(&aParserContext.mDTD); - NS_ENSURE_SUCCESS(rv, rv); - - *aReturn = PR_TRUE; - } - - return rv; -} - -NS_IMETHODIMP -nsParser::CancelParsingEvents() -{ - if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) { - NS_ASSERTION(mEventQueue,"Event queue is null"); - // Revoke all pending continue parsing events - if (mEventQueue != nsnull) { - mEventQueue->RevokeEvents(this); - } - - mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; - } - return NS_OK; -} - -//////////////////////////////////////////////////////////////////////// - - -/** - * This gets called just prior to the model actually - * being constructed. It's important to make this the - * last thing that happens right before parsing, so we - * can delay until the last moment the resolution of - * which DTD to use (unless of course we're assigned one). - */ -nsresult -nsParser::WillBuildModel(nsString& aFilename) -{ - if (!mParserContext) - return kInvalidParserContext; - - if (eUnknownDetect != mParserContext->mAutoDetectStatus) - return NS_OK; - - nsAutoString theBuffer; - // XXXVidur Make a copy and only check in the first 1k - mParserContext->mScanner->Peek(theBuffer, 1024); - - if (eDTDMode_unknown == mParserContext->mDTDMode || - eDTDMode_autodetect == mParserContext->mDTDMode) - DetermineParseMode(theBuffer, mParserContext->mDTDMode, - mParserContext->mDocType, mParserContext->mMimeType); - PRBool found; - nsresult rv = FindSuitableDTD(*mParserContext,theBuffer, &found); - NS_ENSURE_SUCCESS(rv, rv); - - if (!found) - return rv; - - nsITokenizer* tokenizer; - mParserContext->GetTokenizer(mParserContext->mDTD->GetType(), tokenizer); - return mParserContext->mDTD->WillBuildModel(*mParserContext, tokenizer, mSink); -} - -/** - * This gets called when the parser is done with its input. - * Note that the parser may have been called recursively, so we - * have to check for a prev. context before closing out the DTD/sink. - * @update gess5/18/98 - * @param - * @return error code -- 0 if ok, non-zero if error. - */ -nsresult nsParser::DidBuildModel(nsresult anErrorCode) { - //One last thing...close any open containers. - nsresult result=anErrorCode; - - if (IsComplete()) { - if (mParserContext && !mParserContext->mPrevContext) { - if (mParserContext->mDTD) { - result = mParserContext->mDTD->DidBuildModel(anErrorCode,PR_TRUE,this,mSink); - } - //Ref. to bug 61462. - mParserContext->mRequest = 0; - }//if - } - - return result; -} - - -/** - * This method adds a new parser context to the list, - * pushing the current one to the next position. - * @update gess7/22/98 - * @param ptr to new context - * @return nada - */ -void nsParser::PushContext(CParserContext& aContext) { - aContext.mPrevContext=mParserContext; - mParserContext=&aContext; -} - -/** - * This method pops the topmost context off the stack, - * returning it to the user. The next context (if any) - * becomes the current context. - * @update gess7/22/98 - * @return prev. context - */ -CParserContext* nsParser::PopContext() -{ - CParserContext* oldContext = mParserContext; - if (oldContext) { - mParserContext = oldContext->mPrevContext; - if (mParserContext) { - // If the old context was blocked, propagate the blocked state - // back to the new one. Also, propagate the stream listener state - // but don't override onStop state to guarantee the call to DidBuildModel(). - if (mParserContext->mStreamListenerState != eOnStop) { - mParserContext->mStreamListenerState = oldContext->mStreamListenerState; - } - // Preserve tokenizer state so that information is not lost - // between document.write. This fixes bug 99467 - if (mParserContext->mTokenizer) - mParserContext->mTokenizer->CopyState(oldContext->mTokenizer); - } - } - return oldContext; -} - -/** - * Call this when you want control whether or not the parser will parse - * and tokenize input (TRUE), or whether it just caches input to be - * parsed later (FALSE). - * - * @update gess 1/29/99 - * @param aState determines whether we parse/tokenize or just cache. - * @return current state - */ -void nsParser::SetUnusedInput(nsString& aBuffer) -{ - mUnusedInput=aBuffer; -} - -/** - * Call this when you want to *force* the parser to terminate the - * parsing process altogether. This is binary -- so once you terminate - * you can't resume without restarting altogether. - * - * @update gess 7/4/99 - * @return should return NS_OK once implemented - */ -NS_IMETHODIMP nsParser::Terminate(void) -{ - nsresult result = NS_OK; - if (mParserContext && mParserContext->mDTD) { - mParserContext->mDTD->Terminate(); - // XXX - [ until we figure out a way to break parser-sink circularity ] - // Hack - Hold a reference until we are completely done... - nsCOMPtr<nsIParser> kungFuDeathGrip(this); - mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING; - - // CancelParsingEvents must be called to avoid leaking the nsParser object - // @see bug 108049 - // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents - // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note: - // The IsComplete() call inside of DidBuildModel looks at the pendingContinueEvents flag. - CancelParsingEvents(); - DidBuildModel(result); - } - return result; -} - - -/** - * - * @update gess 1/29/99 - * @param aState determines whether we parse/tokenize or just cache. - * @return current state - */ -NS_IMETHODIMP nsParser::ContinueParsing() -{ - // If the stream has already finished, there's a good chance - // that we might start closing things down when the parser - // is reenabled. To make sure that we're not deleted across - // the reenabling process, hold a reference to ourselves. - nsresult result=NS_OK; - nsCOMPtr<nsIParser> kungFuDeathGrip(this); - - mFlags |= NS_PARSER_FLAG_PARSER_ENABLED; - - PRBool isFinalChunk=(mParserContext && mParserContext->mStreamListenerState==eOnStop)? PR_TRUE:PR_FALSE; - - result=ResumeParse(PR_TRUE,isFinalChunk); // Ref. bug 57999 - - if(result!=NS_OK) - result=mInternalState; - - return result; -} - -/** - * Stops parsing temporarily. That's it will prevent the - * parser from building up content model. - * - * @update - * @return - */ -NS_IMETHODIMP_(void) nsParser::BlockParser() -{ - mFlags &= ~NS_PARSER_FLAG_PARSER_ENABLED; - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: nsParser::BlockParser(), this=%p\n", this)); - MOZ_TIMER_STOP(mParseTime); -} - -/** - * Open up the parser for tokenization, building up content - * model..etc. However, this method does not resume parsing - * automatically. It's the callers' responsibility to restart - * the parsing engine. - * - * @update - * @return - */ -NS_IMETHODIMP_(void) nsParser::UnblockParser() -{ - mFlags |= NS_PARSER_FLAG_PARSER_ENABLED; - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: nsParser::UnblockParser(), this=%p\n", this)); - MOZ_TIMER_START(mParseTime); -} - -/** - * Call this to query whether the parser is enabled or not. - * - * @update vidur 4/12/99 - * @return current state - */ -NS_IMETHODIMP_(PRBool) nsParser::IsParserEnabled() -{ - return mFlags & NS_PARSER_FLAG_PARSER_ENABLED; -} - -/** - * Call this to query whether the parser thinks it's done with parsing. - * - * @update rickg 5/12/01 - * @return complete state - */ -NS_IMETHODIMP_(PRBool) nsParser::IsComplete() -{ - return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT); -} - - -void nsParser::HandleParserContinueEvent() { - mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; - ContinueParsing(); -} - -PRBool nsParser::CanInterrupt(void) { - return mFlags & NS_PARSER_FLAG_CAN_INTERRUPT; -} - -void nsParser::SetCanInterrupt(PRBool aCanInterrupt) { - if (aCanInterrupt) { - mFlags |= NS_PARSER_FLAG_CAN_INTERRUPT; - } - else { - mFlags &= ~NS_PARSER_FLAG_CAN_INTERRUPT; - } -} - -/** - * This is the main controlling routine in the parsing process. - * Note that it may get called multiple times for the same scanner, - * since this is a pushed based system, and all the tokens may - * not have been consumed by the scanner during a given invocation - * of this method. - * - * @update gess 01/04/99 - * @param aFilename -- const char* containing file to be parsed. - * @return error code -- 0 if ok, non-zero if error. - */ -NS_IMETHODIMP -nsParser::Parse(nsIURI* aURL, - nsIRequestObserver* aListener, - PRBool aVerifyEnabled, - void* aKey, - nsDTDMode aMode) -{ - - NS_PRECONDITION(aURL, "Error: Null URL given"); - - nsresult result=kBadURL; - mObserver = aListener; - - if (aVerifyEnabled) { - mFlags |= NS_PARSER_FLAG_DTD_VERIFICATION; - } - else { - mFlags &= ~NS_PARSER_FLAG_DTD_VERIFICATION; - } - - if(aURL) { - nsCAutoString spec; - nsresult rv = aURL->GetSpec(spec); - if (rv != NS_OK) { - return rv; - } - NS_ConvertUTF8toUCS2 theName(spec); - - nsScanner* theScanner=new nsScanner(theName,PR_FALSE,mCharset,mCharsetSource); - CParserContext* pc=new CParserContext(theScanner,aKey,mCommand,aListener); - if(pc && theScanner) { - pc->mMultipart=PR_TRUE; - pc->mContextType=CParserContext::eCTURL; - pc->mDTDMode=aMode; - PushContext(*pc); - result=NS_OK; - } - else{ - result=mInternalState=NS_ERROR_HTMLPARSER_BADCONTEXT; - } - } - return result; -} - - -/** - * Cause parser to parse input from given stream - * @update vidur 12/11/98 - * @param aStream is the i/o source - * @return error code -- 0 if ok, non-zero if error. - */ -NS_IMETHODIMP -nsParser::Parse(nsIInputStream* aStream, - const nsACString& aMimeType, - PRBool aVerifyEnabled, - void* aKey, - nsDTDMode aMode) -{ - if (aVerifyEnabled) { - mFlags |= NS_PARSER_FLAG_DTD_VERIFICATION; - } - else { - mFlags &= ~NS_PARSER_FLAG_DTD_VERIFICATION; - } - - nsresult result=NS_ERROR_OUT_OF_MEMORY; - - //ok, time to create our tokenizer and begin the process - nsAutoString theUnknownFilename(NS_LITERAL_STRING("unknown")); - - // references - nsScanner* theScanner=new nsScanner(theUnknownFilename,aStream,mCharset,mCharsetSource); - - CParserContext* pc=new CParserContext(theScanner,aKey,mCommand,0); - if(pc && theScanner) { - PushContext(*pc); - pc->SetMimeType(aMimeType); - pc->mStreamListenerState=eOnStart; - pc->mMultipart=PR_FALSE; - pc->mContextType=CParserContext::eCTStream; - pc->mDTDMode=aMode; - mParserContext->mScanner->Eof(); - result=ResumeParse(); - pc=PopContext(); - delete pc; - } - else{ - result=mInternalState=NS_ERROR_HTMLPARSER_BADCONTEXT; - } - return result; -} - - -/** - * Call this method if all you want to do is parse 1 string full of HTML text. - * In particular, this method should be called by the DOM when it has an HTML - * string to feed to the parser in real-time. - * - * @update gess5/11/98 - * @param aSourceBuffer contains a string-full of real content - * @param aMimeType tells us what type of content to expect in the given string - * @return error code -- 0 if ok, non-zero if error. - */ -NS_IMETHODIMP -nsParser::Parse(const nsAString& aSourceBuffer, - void* aKey, - const nsACString& aMimeType, - PRBool aVerifyEnabled, - PRBool aLastCall, - nsDTDMode aMode) -{ - - //NOTE: Make sure that updates to this method don't cause - // bug #2361 to break again! - - nsresult result=NS_OK; - - if(aLastCall && aSourceBuffer.IsEmpty()) { - // Nothing is being passed to the parser so return - // immediately. mUnusedInput will get processed when - // some data is actually passed in. - return result; - } - - // hack to pass on to the dtd the caller's desire to - // parse a fragment without worrying about containment rules - if (aMode == eDTDMode_fragment) - mCommand = eViewFragment; - - // Maintain a reference to ourselves so we don't go away - // till we're completely done. - nsCOMPtr<nsIParser> kungFuDeathGrip(this); - - if(!aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) { - - if (aVerifyEnabled) { - mFlags |= NS_PARSER_FLAG_DTD_VERIFICATION; - } - else { - mFlags &= ~NS_PARSER_FLAG_DTD_VERIFICATION; - } - - CParserContext* pc=0; - - if((!mParserContext) || (mParserContext->mKey!=aKey)) { - //only make a new context if we dont have one, OR if we do, but has a different context key... - - nsScanner* theScanner = new nsScanner(mUnusedInput,mCharset,mCharsetSource); - NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY); - - nsIDTD *theDTD = 0; - eAutoDetectResult theStatus = eUnknownDetect; - - if (mParserContext && mParserContext->mMimeType==aMimeType) { - NS_ASSERTION(mParserContext->mDTD,"How come the DTD is null?"); // Ref. Bug 90379 - - if (mParserContext) { - // To fix bug 32263 we used create a new instance of the DTD!. - // All we need is a new tokenizer which now gets created with - // a parser context. - theDTD = mParserContext->mDTD; - theStatus=mParserContext->mAutoDetectStatus; - //added this to fix bug 32022. - } - } - - pc = new CParserContext(theScanner, aKey, mCommand, 0, theDTD, theStatus, aLastCall); - NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY); - - PushContext(*pc); - - pc->mMultipart=!aLastCall; //by default - if (pc->mPrevContext) { - pc->mMultipart |= pc->mPrevContext->mMultipart; //if available - } - - // start fix bug 40143 - if(pc->mMultipart) { - pc->mStreamListenerState=eOnDataAvail; - if(pc->mScanner) pc->mScanner->SetIncremental(PR_TRUE); - } - else { - pc->mStreamListenerState=eOnStop; - if(pc->mScanner) pc->mScanner->SetIncremental(PR_FALSE); - } - // end fix for 40143 - - pc->mContextType=CParserContext::eCTString; - pc->SetMimeType(aMimeType); - pc->mDTDMode=aMode; - mUnusedInput.Truncate(0); - - //printf("Parse(string) iterate: %i",PR_FALSE); - pc->mScanner->Append(aSourceBuffer); - // Do not interrupt document.write() - bug 95487 - result = ResumeParse(PR_FALSE, PR_FALSE, PR_FALSE); - } - else { - mParserContext->mScanner->Append(aSourceBuffer); - if(!mParserContext->mPrevContext) { - // Set stream listener state to eOnStop, on the final context - Fix 68160, - // to guarantee DidBuildModel() call - Fix 36148 - if(aLastCall) { - mParserContext->mStreamListenerState=eOnStop; - } - ResumeParse(PR_FALSE, PR_FALSE, PR_FALSE); - } - } - }//if - - return result; -} - -/** - * - * @update gess 04/01/99 - * @param - * @return - */ -NS_IMETHODIMP -nsParser::ParseFragment(const nsAString& aSourceBuffer, - void* aKey, - nsVoidArray& aTagStack, - PRUint32 anInsertPos, - const nsACString& aMimeType, - nsDTDMode aMode) -{ - nsresult result = NS_OK; - nsAutoString theContext; - PRUint32 theCount = aTagStack.Count(); - PRUint32 theIndex = 0; - - while (theIndex++ < theCount){ - theContext.Append(NS_LITERAL_STRING("<")); - theContext.Append((PRUnichar*)aTagStack.ElementAt(theCount - theIndex)); - theContext.Append(NS_LITERAL_STRING(">")); - } - - theContext.Append(NS_LITERAL_STRING("<endnote>")); //XXXHack! I'll make this better later. - - //now it's time to try to build the model from this fragment - - mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED; //disable observers for fragments - result = Parse(theContext + aSourceBuffer,(void*)&theContext,aMimeType,PR_FALSE,PR_TRUE, aMode); - mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED; //now reenable. - - return result; -} - - -/** - * This routine is called to cause the parser to continue parsing it's underlying stream. - * This call allows the parse process to happen in chunks, such as when the content is push - * based, and we need to parse in pieces. - * - * An interesting change in how the parser gets used has led us to add extra processing to this method. - * The case occurs when the parser is blocked in one context, and gets a parse(string) call in another context. - * In this case, the parserContexts are linked. No problem. - * - * The problem is that Parse(string) assumes that it can proceed unabated, but if the parser is already - * blocked that assumption is false. So we needed to add a mechanism here to allow the parser to continue - * to process (the pop and free) contexts until 1) it get's blocked again; 2) it runs out of contexts. - * - * - * @update rickg 03.10.2000 - * @param allowItertion : set to true if non-script resumption is requested - * @param aIsFinalChunk : tells us when the last chunk of data is provided. - * @return error code -- 0 if ok, non-zero if error. - */ -nsresult nsParser::ResumeParse(PRBool allowIteration, PRBool aIsFinalChunk, PRBool aCanInterrupt) { - - //printf(" Resume %i, prev-context: %p\n",allowIteration,mParserContext->mPrevContext); - - - nsresult result=NS_OK; - - if((mFlags & NS_PARSER_FLAG_PARSER_ENABLED) && - mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) { - - - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: nsParser::ResumeParse(), this=%p\n", this)); - MOZ_TIMER_START(mParseTime); - - result = WillBuildModel(mParserContext->mScanner->GetFilename()); - if (NS_FAILED(result)) { - mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE; - return result; - } - - if(mParserContext->mDTD) { - - mParserContext->mDTD->WillResumeParse(mSink); - PRBool theFirstTime=PR_TRUE; - PRBool theIterationIsOk=(theFirstTime || allowIteration||(!mParserContext->mPrevContext)); - - while((result==NS_OK) && (theIterationIsOk)) { - theFirstTime=PR_FALSE; - if(!mUnusedInput.IsEmpty()) { - if(mParserContext->mScanner) { - // -- Ref: Bug# 22485 -- - // Insert the unused input into the source buffer - // as if it was read from the input stream. - // Adding UngetReadable() per vidur!! - mParserContext->mScanner->UngetReadable(mUnusedInput); - mUnusedInput.Truncate(0); - } - } - - //Only allow parsing to be interrupted in the subsequent call - //to build model. - SetCanInterrupt(aCanInterrupt); - nsresult theTokenizerResult = mFlags & NS_PARSER_FLAG_CAN_TOKENIZE ? Tokenize(aIsFinalChunk) : NS_OK; // kEOF==2152596456 - result=BuildModel(); - - if(result==NS_ERROR_HTMLPARSER_INTERRUPTED) { - if(aIsFinalChunk) - PostContinueEvent(); - } - SetCanInterrupt(PR_FALSE); - - theIterationIsOk=PRBool((kEOF!=theTokenizerResult) && (result!=NS_ERROR_HTMLPARSER_INTERRUPTED)); - - // Make sure not to stop parsing too early. Therefore, before shutting down the - // parser, it's important to check whether the input buffer has been scanned to - // completion ( theTokenizerResult should be kEOF ). kEOF -> End of buffer. - - // If we're told to block the parser, we disable all further parsing - // (and cache any data coming in) until the parser is re-enabled. - - if(NS_ERROR_HTMLPARSER_BLOCK==result) { - //BLOCK == 2152596464 - if (mParserContext->mDTD) { - mParserContext->mDTD->WillInterruptParse(mSink); - } - - BlockParser(); - return NS_OK; - } - - else if (NS_ERROR_HTMLPARSER_STOPPARSING==result) { - // Note: Parser Terminate() calls DidBuildModel. - if(mInternalState!=NS_ERROR_HTMLPARSER_STOPPARSING) { - DidBuildModel(mStreamStatus); - mInternalState = result; - } - return NS_OK; - } - - else if(((NS_OK==result) && (theTokenizerResult==kEOF)) || (result==NS_ERROR_HTMLPARSER_INTERRUPTED)){ - - PRBool theContextIsStringBased=PRBool(CParserContext::eCTString==mParserContext->mContextType); - if( (eOnStop==mParserContext->mStreamListenerState) || - (!mParserContext->mMultipart) || theContextIsStringBased) { - - if(!mParserContext->mPrevContext) { - if(eOnStop==mParserContext->mStreamListenerState) { - - DidBuildModel(mStreamStatus); - - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: nsParser::ResumeParse(), this=%p\n", this)); - MOZ_TIMER_STOP(mParseTime); - - MOZ_TIMER_LOG(("Parse Time (this=%p): ", this)); - MOZ_TIMER_PRINT(mParseTime); - - MOZ_TIMER_LOG(("DTD Time: ")); - MOZ_TIMER_PRINT(mDTDTime); - - MOZ_TIMER_LOG(("Tokenize Time: ")); - MOZ_TIMER_PRINT(mTokenizeTime); - - return NS_OK; - } - - } - else { - - CParserContext* theContext=PopContext(); - if(theContext) { - theIterationIsOk=PRBool(allowIteration && theContextIsStringBased); - if(theContext->mCopyUnused) { - theContext->mScanner->CopyUnusedData(mUnusedInput); - } - delete theContext; - } - result = mInternalState; - aIsFinalChunk=(mParserContext && mParserContext->mStreamListenerState==eOnStop)? PR_TRUE:PR_FALSE; - - //...then intentionally fall through to WillInterruptParse()... - } - - } - - } - - if((kEOF==theTokenizerResult) || (result==NS_ERROR_HTMLPARSER_INTERRUPTED)) { - result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result; - if (mParserContext->mDTD) { - mParserContext->mDTD->WillInterruptParse(mSink); - } - } - - - }//while - }//if - else { - mInternalState=result=NS_ERROR_HTMLPARSER_UNRESOLVEDDTD; - } - }//if - - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: nsParser::ResumeParse(), this=%p\n", this)); - MOZ_TIMER_STOP(mParseTime); - - return (result==NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result; -} - -/** - * This is where we loop over the tokens created in the - * tokenization phase, and try to make sense out of them. - * - * @update gess 01/04/99 - * @param - * @return error code -- 0 if ok, non-zero if error. - */ -nsresult nsParser::BuildModel() { - CParserContext* theRootContext = mParserContext; - nsITokenizer* theTokenizer = 0; - - nsresult result = NS_OK; - if (mParserContext) { - PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() : NS_IPARSER_FLAG_HTML; - mParserContext->GetTokenizer(type, theTokenizer); - } - - if (theTokenizer) { - - //Get the root DTD for use in model building... - while (theRootContext->mPrevContext) { - theRootContext = theRootContext->mPrevContext; - } - - nsIDTD* theRootDTD = theRootContext->mDTD; - if (theRootDTD) { - MOZ_TIMER_START(mDTDTime); - - result = theRootDTD->BuildModel(this, theTokenizer, nsnull, mSink); - - MOZ_TIMER_STOP(mDTDTime); - } - } - else{ - mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER; - } - return result; -} - - -/** - * - * @update gess1/22/99 - * @param - * @return - */ -nsresult nsParser::GetTokenizer(nsITokenizer*& aTokenizer) { - nsresult result = NS_OK; - aTokenizer = nsnull; - if(mParserContext) { - PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() : NS_IPARSER_FLAG_HTML; - result = mParserContext->GetTokenizer(type, aTokenizer); - } - return result; -} - -/******************************************************************* - These methods are used to talk to the netlib system... - *******************************************************************/ - -#ifdef rickgdebug -#include <fstream.h> - fstream* gOutFile; -#endif - -/** - * - * - * @update gess 5/12/98 - * @param - * @return error code -- 0 if ok, non-zero if error. - */ -nsresult nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext) { - - NS_PRECONDITION(eNone==mParserContext->mStreamListenerState, - "Parser's nsIStreamListener API was not setup " - "correctly in constructor."); - - if (mObserver) { - mObserver->OnStartRequest(request, aContext); - } - mParserContext->mStreamListenerState = eOnStart; - mParserContext->mAutoDetectStatus = eUnknownDetect; - mParserContext->mDTD = 0; - mParserContext->mRequest = request; - - nsresult rv; - nsCAutoString contentType; - nsCOMPtr<nsIChannel> channel = do_QueryInterface(request); - NS_ASSERTION(channel, "parser needs a channel to find a dtd"); - - rv = channel->GetContentType(contentType); - if (NS_SUCCEEDED(rv)) - { - mParserContext->SetMimeType(contentType); - } - -#ifdef rickgdebug - gOutFile= new fstream("c:/temp/out.file",ios::trunc); -#endif - - return NS_OK; -} - - -#define UTF16_BE "UTF-16BE" -#define UTF16_LE "UTF-16LE" -#define UCS4_BE "UTF-32BE" -#define UCS4_LE "UTF-32LE" -#define UCS4_2143 "X-ISO-10646-UCS-4-2143" -#define UCS4_3412 "X-ISO-10646-UCS-4-3412" -#define UTF8 "UTF-8" - -static inline PRBool IsSecondMarker(unsigned char aChar) -{ - switch (aChar) { - case '!': - case '?': - case 'h': - case 'H': - return PR_TRUE; - default: - return PR_FALSE; - } -} - -static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsCString& oCharset, PRInt32& oCharsetSource) { - oCharsetSource= kCharsetFromAutoDetection; - oCharset.Truncate(); - // See http://www.w3.org/TR/2000/REC-xml-20001006#sec-guessing - // for details - // Also, MS Win2K notepad now generate 3 bytes BOM in UTF8 as UTF8 signature - // We need to check that - // UCS2 BOM FEFF = UTF8 EF BB BF - switch(aBytes[0]) - { - case 0x00: - if(0x00==aBytes[1]) { - // 00 00 - if((0xFE==aBytes[2]) && (0xFF==aBytes[3])) { - // 00 00 FE FF UCS-4, big-endian machine (1234 order) - oCharset.Assign(UCS4_BE); - } else if((0x00==aBytes[2]) && (0x3C==aBytes[3])) { - // 00 00 00 3C UCS-4, big-endian machine (1234 order) - oCharset.Assign(UCS4_BE); - } else if((0xFF==aBytes[2]) && (0xFE==aBytes[3])) { - // 00 00 FF FE UCS-4, unusual octet order (2143) - oCharset.Assign(UCS4_2143); - } else if((0x3C==aBytes[2]) && (0x00==aBytes[3])) { - // 00 00 3C 00 UCS-4, unusual octet order (2143) - oCharset.Assign(UCS4_2143); - } - oCharsetSource = kCharsetFromByteOrderMark; - } else if((0x3C==aBytes[1]) && (0x00==aBytes[2])) { - // 00 3C 00 - if(IsSecondMarker(aBytes[3])) { - // 00 3C 00 SM UTF-16, big-endian, no Byte Order Mark - oCharset.Assign(UTF16_BE); - } else if((0x00==aBytes[3])) { - // 00 3C 00 00 UCS-4, unusual octet order (3412) - oCharset.Assign(UCS4_3412); - } - oCharsetSource = kCharsetFromByteOrderMark; - } - break; - case 0x3C: - if(0x00==aBytes[1] && (0x00==aBytes[3])) { - // 3C 00 XX 00 - if(IsSecondMarker(aBytes[2])) { - // 3C 00 SM 00 UTF-16, little-endian, no Byte Order Mark - oCharset.Assign(UTF16_LE); - } else if((0x00==aBytes[2])) { - // 3C 00 00 00 UCS-4, little-endian machine (4321 order) - oCharset.Assign(UCS4_LE); - } - oCharsetSource = kCharsetFromByteOrderMark; - // For html, meta tag detector is invoked before this so that we have - // to deal only with XML here. - } else if( (0x3F==aBytes[1]) && - (0x78==aBytes[2]) && (0x6D==aBytes[3]) && - (0 == PL_strncmp("<?xml", (char*)aBytes, 5 ))) { - // 3C 3F 78 6D - // ASCII characters are in their normal positions, so we can safely - // deal with the XML declaration in the old C way - // XXX This part could be made simpler by using CWordTokenizer<char>, - // but bug 104479 must be fixed first. - // The shortest string so far (strlen==5): - // <?xml - PRInt32 i; - PRBool versionFound = PR_FALSE, encodingFound = PR_FALSE; - for (i=6; i < aLen && !encodingFound; ++i) { - // end of XML declaration? - if ((((char*)aBytes)[i] == '?') && - ((i+1) < aLen) && - (((char*)aBytes)[i+1] == '>')) { - break; - } - // Version is required. - if (!versionFound) { - // Want to avoid string comparisons, hence looking for 'n' - // and only if found check the string leading to it. Not - // foolproof, but fast. - // The shortest string allowed before this is (strlen==13): - // <?xml version - if ((((char*)aBytes)[i] == 'n') && - (i >= 12) && - (0 == PL_strncmp("versio", (char*)(aBytes+i-6), 6 ))) { - // Fast forward through version - char q = 0; - for (++i; i < aLen; ++i) { - char qi = ((char*)aBytes)[i]; - if (qi == '\'' || qi == '"') { - if (q && q == qi) { - // ending quote - versionFound = PR_TRUE; - break; - } else { - // Starting quote - q = qi; - } - } - } - } - } else { - // encoding must follow version - // Want to avoid string comparisons, hence looking for 'g' - // and only if found check the string leading to it. Not - // foolproof, but fast. - // The shortest allowed string before this (strlen==26): - // <?xml version="1" encoding - if ((((char*)aBytes)[i] == 'g') && - (i >= 25) && - (0 == PL_strncmp("encodin", (char*)(aBytes+i-7), 7 ))) { - PRInt32 encStart = 0; - char q = 0; - for (++i; i < aLen; ++i) { - char qi = ((char*)aBytes)[i]; - if (qi == '\'' || qi == '"') { - if (q && q == qi) { - PRInt32 count = i - encStart; - // encoding value is invalid if it is UTF-16 - if (count > 0 && - (0 != PL_strcmp("UTF-16", (char*)(aBytes+encStart)))) { - oCharset.Assign((char*)(aBytes+encStart),count); - oCharsetSource = kCharsetFromMetaTag; - } - encodingFound = PR_TRUE; - break; - } else { - encStart = i+1; - q = qi; - } - } - } - } - } // if (!versionFound) - } // for - } - break; - case 0xEF: - if((0xBB==aBytes[1]) && (0xBF==aBytes[2])) { - // EF BB BF - // Win2K UTF-8 BOM - oCharset.Assign(UTF8); - oCharsetSource= kCharsetFromByteOrderMark; - } - break; - case 0xFE: - if(0xFF==aBytes[1]) { - if(0x00==aBytes[2] && 0x00==aBytes[3]) { - // FE FF 00 00 UCS-4, unusual octet order (3412) - oCharset.Assign(UCS4_3412); - } else { - // FE FF UTF-16, big-endian - oCharset.Assign(UTF16_BE); - } - oCharsetSource= kCharsetFromByteOrderMark; - } - break; - case 0xFF: - if(0xFE==aBytes[1]) { - if(0x00==aBytes[2] && 0x00==aBytes[3]) - // FF FE 00 00 UTF-32, little-endian - oCharset.Assign(UCS4_LE); - else - // FF FE - // UTF-16, little-endian - oCharset.Assign(UTF16_LE); - oCharsetSource= kCharsetFromByteOrderMark; - } - break; - // case 0x4C: if((0x6F==aBytes[1]) && ((0xA7==aBytes[2] && (0x94==aBytes[3])) { - // We do not care EBCIDIC here.... - // } - // break; - } // switch - return !oCharset.IsEmpty(); -} - -inline const char GetNextChar(nsACString::const_iterator& aStart, - nsACString::const_iterator& aEnd) -{ - NS_ASSERTION(aStart != aEnd, "end of buffer"); - return (++aStart != aEnd) ? *aStart : '\0'; -} - -PRBool -nsParser::DetectMetaTag(const char* aBytes, - PRInt32 aLen, - nsCString& aCharset, - PRInt32& aCharsetSource) -{ - aCharsetSource= kCharsetFromMetaTag; - aCharset.SetLength(0); - - // XXX Only look inside HTML documents for now. For XML - // documents we should be looking inside the XMLDecl. - if (!mParserContext->mMimeType.Equals(NS_LITERAL_CSTRING(kHTMLTextContentType))) { - return PR_FALSE; - } - - // Fast and loose parsing to determine if we have a complete - // META tag in this block, looking upto 2k into it. - const nsASingleFragmentCString& str = - Substring(aBytes, aBytes + PR_MIN(aLen, 2048)); - // XXXldb Should be const_char_iterator when FindInReadable supports it. - nsACString::const_iterator begin, end; - - str.BeginReading(begin); - str.EndReading(end); - nsACString::const_iterator currPos(begin); - nsACString::const_iterator tokEnd; - nsACString::const_iterator tagEnd(begin); - - while (currPos != end) { - if (!FindCharInReadable('<', currPos, end)) - break; // no tag found in this buffer - - if (GetNextChar(currPos, end) == '!' && - GetNextChar(currPos, end) == '-' && - GetNextChar(currPos, end) == '-') { - // Found MDO ( <!-- ). Now search for MDC ( --[*s]> ) - PRBool foundMDC = PR_FALSE; - PRBool foundMatch = PR_FALSE; - while (!foundMDC) { - if (GetNextChar(currPos, end) == '-' && - GetNextChar(currPos, end) == '-') { - foundMatch = !foundMatch; // toggle until we've matching "--" - } - else if (currPos == end) { - return PR_FALSE; // Couldn't find --[*s]> in this buffer - } - else if (foundMatch && *currPos == '>') { - foundMDC = PR_TRUE; // found comment end delimiter. - ++currPos; - } - } - continue; // continue searching for META tag. - } - - // Find the end of the tag, break if incomplete - tagEnd = currPos; - if (!FindCharInReadable('>', tagEnd, end)) - break; - - // If this is not a META tag, continue to next loop - if ( (*currPos != 'm' && *currPos != 'M') || - (*(++currPos) != 'e' && *currPos != 'E') || - (*(++currPos) != 't' && *currPos != 'T') || - (*(++currPos) != 'a' && *currPos != 'A') ) { - currPos = tagEnd; - continue; - } - - // If could not find "charset" in this tag, skip this tag and try next - tokEnd = tagEnd; - if (!CaseInsensitiveFindInReadable(NS_LITERAL_CSTRING("CHARSET"), currPos, tokEnd)) { - currPos = tagEnd; - continue; - } - currPos = tokEnd; - - // skip spaces before '=' - while (*currPos == kSpace || *currPos == kNewLine || - *currPos == kCR || *currPos == kTab) - ++currPos; - // skip '=' - if (*currPos != '=') { - currPos = tagEnd; - continue; - } - ++currPos; - // skip spaces after '=' - while (*currPos == kSpace || *currPos == kNewLine || - *currPos == kCR || *currPos == kTab) - ++currPos; - - // skip open quote - if ((*currPos == '\'' || *currPos == '\"')) - ++currPos; - - // find the end of charset string - tokEnd = currPos; - while (*tokEnd != '\'' && *tokEnd != '\"' && tokEnd != tagEnd) - ++tokEnd; - - // return true if we successfully got something for charset - if (currPos != tokEnd) { - aCharset.Assign(currPos.get(), tokEnd.get() - currPos.get()); - return PR_TRUE; - } - - //nothing specified as charset, continue next loop - currPos = tagEnd; - } - - return PR_FALSE; -} - -typedef struct { - PRBool mNeedCharsetCheck; - nsParser* mParser; - nsIParserFilter* mParserFilter; - nsScanner* mScanner; - nsIRequest* mRequest; -} ParserWriteStruct; - -/* - * This function is invoked as a result of a call to a stream's - * ReadSegments() method. It is called for each contiguous buffer - * of data in the underlying stream or pipe. Using ReadSegments - * allows us to avoid copying data to read out of the stream. - */ -static NS_METHOD -ParserWriteFunc(nsIInputStream* in, - void* closure, - const char* fromRawSegment, - PRUint32 toOffset, - PRUint32 count, - PRUint32 *writeCount) -{ - nsresult result; - ParserWriteStruct* pws = NS_STATIC_CAST(ParserWriteStruct*, closure); - const char* buf = fromRawSegment; - PRUint32 theNumRead = count; - - if (!pws) { - return NS_ERROR_FAILURE; - } - - if(pws->mNeedCharsetCheck) { - PRInt32 guessSource; - nsCAutoString guess; - nsCAutoString preferred; - - pws->mNeedCharsetCheck = PR_FALSE; - if(pws->mParser->DetectMetaTag(buf, theNumRead, - guess, guessSource) || - ((count >= 4) && - DetectByteOrderMark((const unsigned char*)buf, - theNumRead, guess, guessSource))) { - nsCOMPtr<nsICharsetAlias> alias(do_GetService(NS_CHARSETALIAS_CONTRACTID)); - result = alias->GetPreferred(guess, preferred); - // Only continue if it's a recognized charset and not - // one of a designated set that we ignore. - if (NS_SUCCEEDED(result) && - ((kCharsetFromByteOrderMark == guessSource) || - (!preferred.Equals(NS_LITERAL_CSTRING("UTF-16")) && - !preferred.Equals(NS_LITERAL_CSTRING("UTF-16BE")) && - !preferred.Equals(NS_LITERAL_CSTRING("UTF-16LE")) && - !preferred.Equals(NS_LITERAL_CSTRING("UTF-32BE")) && - !preferred.Equals(NS_LITERAL_CSTRING("UTF-32LE"))))) { - guess = preferred; - pws->mParser->SetDocumentCharset(guess, guessSource); - pws->mParser->SetSinkCharset(preferred); - nsCOMPtr<nsICachingChannel> channel(do_QueryInterface(pws->mRequest)); - if (channel) { - nsCOMPtr<nsISupports> cacheToken; - channel->GetCacheToken(getter_AddRefs(cacheToken)); - if (cacheToken) { - nsCOMPtr<nsICacheEntryDescriptor> cacheDescriptor(do_QueryInterface(cacheToken)); - if (cacheDescriptor) { -#ifdef DEBUG - nsresult rv = -#endif - cacheDescriptor->SetMetaDataElement("charset", - guess.get()); - NS_ASSERTION(NS_SUCCEEDED(rv),"cannot SetMetaDataElement"); - } - } - } - } - } - } - - if(pws->mParserFilter) - pws->mParserFilter->RawBuffer(buf, &theNumRead); - - result = pws->mScanner->Append(buf, theNumRead); - if (NS_SUCCEEDED(result)) { - *writeCount = count; - } - - return result; -} - -/** - * - * - * @update gess 1/4/99 - * @param pIStream contains the input chars - * @param length is the number of bytes waiting input - * @return error code (usually 0) - */ - -nsresult nsParser::OnDataAvailable(nsIRequest *request, nsISupports* aContext, - nsIInputStream *pIStream, PRUint32 sourceOffset, PRUint32 aLength) -{ - - -NS_PRECONDITION((eOnStart == mParserContext->mStreamListenerState || - eOnDataAvail == mParserContext->mStreamListenerState), - "Error: OnStartRequest() must be called before OnDataAvailable()"); - - nsresult result=NS_OK; - - CParserContext *theContext=mParserContext; - - while(theContext) { - if(theContext->mRequest!=request && theContext->mPrevContext) - theContext=theContext->mPrevContext; - else break; - } - - if(theContext && theContext->mRequest==request) { - - theContext->mStreamListenerState=eOnDataAvail; - - if(eInvalidDetect==theContext->mAutoDetectStatus) { - if(theContext->mScanner) { - nsScannerIterator iter; - theContext->mScanner->EndReading(iter); - theContext->mScanner->SetPosition(iter, PR_TRUE); - } - } - - PRUint32 totalRead; - ParserWriteStruct pws; - pws.mNeedCharsetCheck = - ((0 == sourceOffset) && (mCharsetSource<kCharsetFromMetaTag)); - pws.mParser = this; - pws.mParserFilter = mParserFilter; - pws.mScanner = theContext->mScanner; - pws.mRequest = request; - - result = pIStream->ReadSegments(ParserWriteFunc, (void*)&pws, aLength, &totalRead); - if (NS_FAILED(result)) { - return result; - } - - result=ResumeParse(); - } - - return result; -} - -/** - * This is called by the networking library once the last block of data - * has been collected from the net. - * - * @update gess 04/01/99 - * @param - * @return - */ -nsresult nsParser::OnStopRequest(nsIRequest *request, nsISupports* aContext, - nsresult status) -{ - - nsresult result=NS_OK; - - if(eOnStart==mParserContext->mStreamListenerState) { - nsAutoString temp; - - //If you're here, then OnDataAvailable() never got called. - //Prior to necko, we never dealt with this case, but the problem may have existed. - //What we'll do (for now at least) is construct a blank HTML document. - if (!mParserContext->mMimeType.Equals(NS_LITERAL_CSTRING(kPlainTextContentType))) - { - temp.Assign(NS_LITERAL_STRING("<html><body></body></html>")); - } - // XXX: until bug #108067 has been fixed we must ensure that *something* - // is in the scanner! so, for now just put in a single space. - else { - temp.Assign(NS_LITERAL_STRING(" ")); - } - mParserContext->mScanner->Append(temp); - result=ResumeParse(PR_TRUE,PR_TRUE); - } - - mParserContext->mStreamListenerState=eOnStop; - mStreamStatus=status; - - if(mParserFilter) - mParserFilter->Finish(); - - mParserContext->mScanner->SetIncremental(PR_FALSE); - result=ResumeParse(PR_TRUE,PR_TRUE); - - // If the parser isn't enabled, we don't finish parsing till - // it is reenabled. - - - // XXX Should we wait to notify our observers as well if the - // parser isn't yet enabled? - if (mObserver) { - mObserver->OnStopRequest(request, aContext, status); - } - -#ifdef rickgdebug - if(gOutFile){ - gOutFile->close(); - delete gOutFile; - gOutFile=0; - } -#endif - - return result; -} - - -/******************************************************************* - Here comes the tokenization methods... - *******************************************************************/ - - -/** - * Part of the code sandwich, this gets called right before - * the tokenization process begins. The main reason for - * this call is to allow the delegate to do initialization. - * - * @update gess 01/04/99 - * @param - * @return TRUE if it's ok to proceed - */ -PRBool nsParser::WillTokenize(PRBool aIsFinalChunk){ - nsITokenizer* theTokenizer=0; - nsresult result = NS_OK; - if (mParserContext) { - PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() : NS_IPARSER_FLAG_HTML; - mParserContext->GetTokenizer(type, theTokenizer); - } - - if (theTokenizer) { - result = theTokenizer->WillTokenize(aIsFinalChunk,&mTokenAllocator); - } - return result; -} - - -/** - * This is the primary control routine to consume tokens. - * It iteratively consumes tokens until an error occurs or - * you run out of data. - * - * @update gess 01/04/99 - * @return error code -- 0 if ok, non-zero if error. - */ -nsresult nsParser::Tokenize(PRBool aIsFinalChunk){ - - nsITokenizer* theTokenizer = 0; - - nsresult result = NS_OK; - - if (mParserContext) { - PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() : NS_IPARSER_FLAG_HTML; - mParserContext->GetTokenizer(type, theTokenizer); - } - - if (theTokenizer) { - if (mFlags & NS_PARSER_FLAG_FLUSH_TOKENS) { - // For some reason tokens didn't get flushed ( probably - // the parser got blocked before all the tokens in the - // stack got handled ). Flush 'em now. Ref. bug 104856 - if (theTokenizer->GetCount() == 0) { - mFlags &= ~NS_PARSER_FLAG_FLUSH_TOKENS; // reset since the tokens have been flushed. - // Resume tokenization for the rest of the document - // since all the tokens in the tokenizer got flushed. - result = Tokenize(aIsFinalChunk); - } - } - else { - PRBool flushTokens=PR_FALSE; - - MOZ_TIMER_START(mTokenizeTime); - - WillTokenize(aIsFinalChunk); - while (NS_SUCCEEDED(result)) { - mParserContext->mScanner->Mark(); - result=theTokenizer->ConsumeToken(*mParserContext->mScanner, flushTokens); - if (NS_FAILED(result)) { - mParserContext->mScanner->RewindToMark(); - if (kEOF == result){ - break; - } - else if(NS_ERROR_HTMLPARSER_STOPPARSING==result) { - result = Terminate(); - break; - } - } - else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) { - // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix Bug# 23931. - // Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 -- - // Also remember to update the marked position. - mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS; - mParserContext->mScanner->Mark(); - break; - } - } - DidTokenize(aIsFinalChunk); - - MOZ_TIMER_STOP(mTokenizeTime); - } - } - else{ - result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER; - } - - return result; -} - -/** - * This is the tail-end of the code sandwich for the - * tokenization process. It gets called once tokenziation - * has completed for each phase. - * - * @update gess 01/04/99 - * @param - * @return TRUE if all went well - */ -PRBool nsParser::DidTokenize(PRBool aIsFinalChunk){ - PRBool result=PR_TRUE; - - nsITokenizer* theTokenizer=0; - nsresult rv = NS_OK; - if (mParserContext) { - PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() : NS_IPARSER_FLAG_HTML; - mParserContext->GetTokenizer(type, theTokenizer); - } - - if (NS_SUCCEEDED(rv) && theTokenizer) { - result = theTokenizer->DidTokenize(aIsFinalChunk); - } - return result; -} - -/** - * Get the channel associated with this parser - * @update harishd,gagan 07/17/01 - * @param aChannel out param that will contain the result - * @return NS_OK if successful - */ -NS_IMETHODIMP -nsParser::GetChannel(nsIChannel** aChannel) -{ - nsresult result = NS_ERROR_NOT_AVAILABLE; - if (mParserContext && mParserContext->mRequest) - result = CallQueryInterface(mParserContext->mRequest, aChannel); - return result; -} - -/** - * Get the DTD associated with this parser - * @update vidur 9/29/99 - * @param aDTD out param that will contain the result - * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error - */ -NS_IMETHODIMP -nsParser::GetDTD(nsIDTD** aDTD) -{ - if (mParserContext) { - *aDTD = mParserContext->mDTD; - NS_IF_ADDREF(mParserContext->mDTD); - } - - return NS_OK; -} - diff --git a/htmlparser/src/nsParser.h b/htmlparser/src/nsParser.h deleted file mode 100644 index ee99b20038b6..000000000000 --- a/htmlparser/src/nsParser.h +++ /dev/null @@ -1,468 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -/** - * MODULE NOTES: - * @update gess 4/1/98 - * - * This class does two primary jobs: - * 1) It iterates the tokens provided during the - * tokenization process, identifing where elements - * begin and end (doing validation and normalization). - * 2) It controls and coordinates with an instance of - * the IContentSink interface, to coordinate the - * the production of the content model. - * - * The basic operation of this class assumes that an HTML - * document is non-normalized. Therefore, we don't process - * the document in a normalized way. Don't bother to look - * for methods like: doHead() or doBody(). - * - * Instead, in order to be backward compatible, we must - * scan the set of tokens and perform this basic set of - * operations: - * 1) Determine the token type (easy, since the tokens know) - * 2) Determine the appropriate section of the HTML document - * each token belongs in (HTML,HEAD,BODY,FRAMESET). - * 3) Insert content into our document (via the sink) into - * the correct section. - * 4) In the case of tags that belong in the BODY, we must - * ensure that our underlying document state reflects - * the appropriate context for our tag. - * - * For example,if we see a <TR>, we must ensure our - * document contains a table into which the row can - * be placed. This may result in "implicit containers" - * created to ensure a well-formed document. - * - */ - -#ifndef NS_PARSER__ -#define NS_PARSER__ - -#include "nsIParser.h" -#include "nsDeque.h" -#include "nsParserNode.h" -#include "nsIURL.h" -#include "CParserContext.h" -#include "nsParserCIID.h" -#include "nsITokenizer.h" -#include "nsHTMLTags.h" -#include "nsDTDUtils.h" -#include "nsTimer.h" -#include "nsIEventQueue.h" -#include "nsIContentSink.h" -#include "nsIParserFilter.h" - -class nsIDTD; -class nsScanner; -class nsIProgressEventSink; - -#ifdef _MSC_VER -#pragma warning( disable : 4275 ) -#endif - - -class nsParser : public nsIParser, - public nsIStreamListener{ - - - public: - friend class CTokenHandler; - static void FreeSharedObjects(void); - - NS_DECL_ISUPPORTS - - - /** - * default constructor - * @update gess5/11/98 - */ - nsParser(); - - - /** - * Destructor - * @update gess5/11/98 - */ - virtual ~nsParser(); - - /** - * Select given content sink into parser for parser output - * @update gess5/11/98 - * @param aSink is the new sink to be used by parser - * @return old sink, or NULL - */ - NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink); - - /** - * retrive the sink set into the parser - * @update gess5/11/98 - * @param aSink is the new sink to be used by parser - * @return old sink, or NULL - */ - NS_IMETHOD_(nsIContentSink*) GetContentSink(void); - - /** - * Call this method once you've created a parser, and want to instruct it - * about the command which caused the parser to be constructed. For example, - * this allows us to select a DTD which can do, say, view-source. - * - * @update gess 3/25/98 - * @param aCommand -- ptrs to string that contains command - * @return nada - */ - NS_IMETHOD_(void) GetCommand(nsString& aCommand); - NS_IMETHOD_(void) SetCommand(const char* aCommand); - NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand); - - /** - * Call this method once you've created a parser, and want to instruct it - * about what charset to load - * - * @update ftang 4/23/99 - * @param aCharset- the charset of a document - * @param aCharsetSource- the source of the charset - * @return nada - */ - NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, PRInt32 aSource); - - NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, PRInt32& aSource) - { - aCharset = mCharset; - aSource = mCharsetSource; - } - - - NS_IMETHOD_(void) SetParserFilter(nsIParserFilter* aFilter); - - NS_IMETHOD RegisterDTD(nsIDTD* aDTD); - - /** - * Retrieve the scanner from the topmost parser context - * - * @update gess 6/9/98 - * @return ptr to scanner - */ - NS_IMETHOD_(nsDTDMode) GetParseMode(void); - - /** - * Cause parser to parse input from given URL - * @update gess5/11/98 - * @param aURL is a descriptor for source document - * @param aListener is a listener to forward notifications to - * @return TRUE if all went well -- FALSE otherwise - */ - NS_IMETHOD Parse(nsIURI* aURL, - nsIRequestObserver* aListener = nsnull, - PRBool aEnableVerify = PR_FALSE, - void* aKey = 0, - nsDTDMode aMode = eDTDMode_autodetect); - - /** - * Cause parser to parse input from given stream - * @update gess5/11/98 - * @param aStream is the i/o source - * @return TRUE if all went well -- FALSE otherwise - */ - NS_IMETHOD Parse(nsIInputStream* aStream, - const nsACString& aMimeType, - PRBool aEnableVerify = PR_FALSE, - void* aKey = 0, - nsDTDMode aMode = eDTDMode_autodetect); - - /** - * @update gess5/11/98 - * @param anHTMLString contains a string-full of real HTML - * @param appendTokens tells us whether we should insert tokens inline, or append them. - * @return TRUE if all went well -- FALSE otherwise - */ - NS_IMETHOD Parse(const nsAString& aSourceBuffer, - void* aKey, - const nsACString& aContentType, - PRBool aEnableVerify, - PRBool aLastCall, - nsDTDMode aMode = eDTDMode_autodetect); - - NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer, - void* aKey, - nsVoidArray& aTagStack, - PRUint32 anInsertPos, - const nsACString& aContentType, - nsDTDMode aMode = eDTDMode_autodetect); - - - /** - * This method gets called when the tokens have been consumed, and it's time - * to build the model via the content sink. - * @update gess5/11/98 - * @return YES if model building went well -- NO otherwise. - */ - NS_IMETHOD BuildModel(void); - - /** - * Call this when you want control whether or not the parser will parse - * and tokenize input (TRUE), or whether it just caches input to be - * parsed later (FALSE). - * - * @update gess 9/1/98 - * @param aState determines whether we parse/tokenize or just cache. - * @return current state - */ - NS_IMETHOD ContinueParsing(); - NS_IMETHOD_(void) BlockParser(); - NS_IMETHOD_(void) UnblockParser(); - NS_IMETHOD Terminate(void); - - /** - * Call this to query whether the parser is enabled or not. - * - * @update vidur 4/12/99 - * @return current state - */ - NS_IMETHOD_(PRBool) IsParserEnabled(); - - /** - * Call this to query whether the parser thinks it's done with parsing. - * - * @update rickg 5/12/01 - * @return complete state - */ - NS_IMETHOD_(PRBool) IsComplete(); - - /** - * This rather arcane method (hack) is used as a signal between the - * DTD and the parser. It allows the DTD to tell the parser that content - * that comes through (parser::parser(string)) but not consumed should - * propagate into the next string based parse call. - * - * @update gess 9/1/98 - * @param aState determines whether we propagate unused string content. - * @return current state - */ - void SetUnusedInput(nsString& aBuffer); - - /** - * This method gets called (automatically) during incremental parsing - * @update gess5/11/98 - * @return TRUE if all went well, otherwise FALSE - */ - virtual nsresult ResumeParse(PRBool allowIteration = PR_TRUE, - PRBool aIsFinalChunk = PR_FALSE, - PRBool aCanInterrupt = PR_TRUE); - - //********************************************* - // These methods are callback methods used by - // net lib to let us know about our inputstream. - //********************************************* - // nsIRequestObserver methods: - NS_DECL_NSIREQUESTOBSERVER - - // nsIStreamListener methods: - NS_DECL_NSISTREAMLISTENER - - void PushContext(CParserContext& aContext); - CParserContext* PopContext(); - CParserContext* PeekContext() {return mParserContext;} - - /** - * - * @update gess 1/22/99 - * @param - * @return - */ - nsresult GetTokenizer(nsITokenizer*& aTokenizer); - - /** - * Get the channel associated with this parser - * @update harishd,gagan 07/17/01 - * @param aChannel out param that will contain the result - * @return NS_OK if successful - */ - NS_IMETHOD GetChannel(nsIChannel** aChannel); - - /** - * Get the DTD associated with this parser - * @update vidur 9/29/99 - * @param aDTD out param that will contain the result - * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error - */ - NS_IMETHOD GetDTD(nsIDTD** aDTD); - - /** - * Detects the existence of a META tag with charset information in - * the given buffer. - */ - PRBool DetectMetaTag(const char* aBytes, - PRInt32 aLen, - nsCString& oCharset, - PRInt32& oCharsetSource); - - void SetSinkCharset(nsACString& aCharset); - - /** - * Removes continue parsing events - * @update kmcclusk 5/18/98 - */ - - NS_IMETHODIMP CancelParsingEvents(); - - /** - * Indicates whether the parser is in a state where it - * can be interrupted. - * @return PR_TRUE if parser can be interrupted, PR_FALSE if it can not be interrupted. - * @update kmcclusk 5/18/98 - */ - PRBool CanInterrupt(void); - - /** - * Set to parser state to indicate whether parsing tokens can be interrupted - * @param aCanInterrupt PR_TRUE if parser can be interrupted, PR_FALSE if it can not be interrupted. - * @update kmcclusk 5/18/98 - */ - void SetCanInterrupt(PRBool aCanInterrupt); - - /** - * This is called when the final chunk has been - * passed to the parser and the content sink has - * interrupted token processing. It schedules - * a ParserContinue PL_Event which will ask the parser - * to HandleParserContinueEvent when it is handled. - * @update kmcclusk6/1/2001 - */ - nsresult PostContinueEvent(); - - /** - * Fired when the continue parse event is triggered. - * @update kmcclusk 5/18/98 - */ - void HandleParserContinueEvent(void); - -protected: - - /** - * - * @update gess5/18/98 - * @param - * @return - */ - nsresult WillBuildModel(nsString& aFilename); - - /** - * - * @update gess5/18/98 - * @param - * @return - */ - nsresult DidBuildModel(nsresult anErrorCode); - -private: - - /******************************************* - These are the tokenization methods... - *******************************************/ - - /** - * Part of the code sandwich, this gets called right before - * the tokenization process begins. The main reason for - * this call is to allow the delegate to do initialization. - * - * @update gess 3/25/98 - * @param - * @return TRUE if it's ok to proceed - */ - PRBool WillTokenize(PRBool aIsFinalChunk = PR_FALSE); - - - /** - * This is the primary control routine. It iteratively - * consumes tokens until an error occurs or you run out - * of data. - * - * @update gess 3/25/98 - * @return error code - */ - nsresult Tokenize(PRBool aIsFinalChunk = PR_FALSE); - - /** - * This is the tail-end of the code sandwich for the - * tokenization process. It gets called once tokenziation - * has completed. - * - * @update gess 3/25/98 - * @param - * @return TRUE if all went well - */ - PRBool DidTokenize(PRBool aIsFinalChunk = PR_FALSE); - - -protected: - //********************************************* - // And now, some data members... - //********************************************* - - - nsCOMPtr<nsIEventQueue> mEventQueue; - CParserContext* mParserContext; - nsCOMPtr<nsIRequestObserver> mObserver; - nsCOMPtr<nsIContentSink> mSink; - - nsCOMPtr<nsIParserFilter> mParserFilter; - nsTokenAllocator mTokenAllocator; - - eParserCommands mCommand; - nsresult mInternalState; - PRInt32 mStreamStatus; - PRInt32 mCharsetSource; - - PRUint16 mFlags; - - nsString mUnusedInput; - nsCString mCharset; - nsString mCommandStr; - - - -public: - - MOZ_TIMER_DECLARE(mParseTime) - MOZ_TIMER_DECLARE(mDTDTime) - MOZ_TIMER_DECLARE(mTokenizeTime) -}; - -#endif - diff --git a/htmlparser/src/nsParserModule.cpp b/htmlparser/src/nsParserModule.cpp deleted file mode 100644 index cfb5d010a958..000000000000 --- a/htmlparser/src/nsParserModule.cpp +++ /dev/null @@ -1,136 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Pierre Phaneuf <pp@ludusdesign.com> - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "nsIAtom.h" -#include "nsString.h" -#include "nspr.h" -#include "nsCOMPtr.h" -#include "nsIGenericFactory.h" -#include "nsIModule.h" -#include "nsParserCIID.h" -#include "nsParser.h" -#include "CNavDTD.h" -#include "COtherDTD.h" -#include "nsHTMLEntities.h" -#include "nsHTMLTokenizer.h" -//#include "nsTextTokenizer.h" -#include "nsElementTable.h" -#include "nsParserService.h" - -#ifdef MOZ_VIEW_SOURCE -#include "nsViewSourceHTML.h" -#endif - -#ifdef NS_DEBUG -#include "nsLoggingSink.h" -#include "nsExpatDriver.h" -#endif - -//---------------------------------------------------------------------- - -#ifdef NS_DEBUG -NS_GENERIC_FACTORY_CONSTRUCTOR(nsLoggingSink) -NS_GENERIC_FACTORY_CONSTRUCTOR(nsExpatDriver) -#endif - -NS_GENERIC_FACTORY_CONSTRUCTOR(nsParser) -NS_GENERIC_FACTORY_CONSTRUCTOR(CNavDTD) -NS_GENERIC_FACTORY_CONSTRUCTOR(CTransitionalDTD) -NS_GENERIC_FACTORY_CONSTRUCTOR(nsParserService) - -#ifdef MOZ_VIEW_SOURCE -NS_GENERIC_FACTORY_CONSTRUCTOR(CViewSourceHTML) -#endif - -static const nsModuleComponentInfo gComponents[] = { - -#ifdef NS_DEBUG - { "Logging sink", NS_LOGGING_SINK_CID, NULL, nsLoggingSinkConstructor }, - { "Expat Driver", NS_EXPAT_DRIVER_CID, NULL, nsExpatDriverConstructor }, -#endif - - { "Parser", NS_PARSER_CID, NULL, nsParserConstructor }, - { "Navigator HTML DTD", NS_CNAVDTD_CID, NULL, CNavDTDConstructor }, - { "Transitional DTD", NS_CTRANSITIONAL_DTD_CID, NULL, - CTransitionalDTDConstructor }, -#ifdef MOZ_VIEW_SOURCE - { "ViewSource DTD", NS_VIEWSOURCE_DTD_CID, NULL, CViewSourceHTMLConstructor }, -#endif - { "ParserService", - NS_PARSERSERVICE_CID, - NS_PARSER_CONTRACTID_PREFIX "/parser-service;1", - nsParserServiceConstructor - } -}; - -static PRBool gInitialized = PR_FALSE; - -PR_STATIC_CALLBACK(nsresult) -Initialize(nsIModule* aSelf) -{ - if (!gInitialized) { - nsresult rv = nsHTMLTags::AddRefTable(); - NS_ENSURE_SUCCESS(rv, rv); - - rv = nsHTMLEntities::AddRefTable(); - if (NS_FAILED(rv)) { - nsHTMLTags::ReleaseTable(); - return rv; - } - InitializeElementTable(); - CNewlineToken::AllocNewline(); - gInitialized = PR_TRUE; - } - return NS_OK; -} - -PR_STATIC_CALLBACK(void) -Shutdown(nsIModule* aSelf) -{ - if (gInitialized) { - nsHTMLTags::ReleaseTable(); - nsHTMLEntities::ReleaseTable(); - nsDTDContext::ReleaseGlobalObjects(); - nsParser::FreeSharedObjects(); - DeleteElementTable(); - CNewlineToken::FreeNewline(); - gInitialized = PR_FALSE; - } -} - -NS_IMPL_NSGETMODULE_WITH_CTOR_DTOR(nsParserModule, gComponents, Initialize, Shutdown) diff --git a/htmlparser/src/nsParserMsgUtils.cpp b/htmlparser/src/nsParserMsgUtils.cpp deleted file mode 100644 index 4c3b91ff689d..000000000000 --- a/htmlparser/src/nsParserMsgUtils.cpp +++ /dev/null @@ -1,101 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "nsIServiceManager.h" -#include "nsIStringBundle.h" -#include "nsXPIDLString.h" -#include "nsParserMsgUtils.h" -#include "nsNetCID.h" - -static NS_DEFINE_CID(kStringBundleServiceCID, NS_STRINGBUNDLESERVICE_CID); - -// This code is derived from nsFormControlHelper::GetLocalizedString() - -static nsresult GetBundle(const char * aPropFileName, nsIStringBundle **aBundle) -{ - NS_ENSURE_ARG_POINTER(aPropFileName); - NS_ENSURE_ARG_POINTER(aBundle); - - // Create a bundle for the localization - nsresult rv; - - nsCOMPtr<nsIStringBundleService> stringService = - do_GetService(kStringBundleServiceCID, &rv); - if (NS_SUCCEEDED(rv)) - rv = stringService->CreateBundle(aPropFileName, aBundle); - - return rv; -} - -nsresult -nsParserMsgUtils::GetLocalizedStringByName(const char * aPropFileName, const char* aKey, nsString& oVal) -{ - oVal.Truncate(); - - NS_ENSURE_ARG_POINTER(aKey); - - nsCOMPtr<nsIStringBundle> bundle; - nsresult rv = GetBundle(aPropFileName,getter_AddRefs(bundle)); - if (NS_SUCCEEDED(rv) && bundle) { - nsXPIDLString valUni; - nsAutoString key; key.AssignWithConversion(aKey); - rv = bundle->GetStringFromName(key.get(), getter_Copies(valUni)); - if (NS_SUCCEEDED(rv) && valUni) { - oVal.Assign(valUni); - } - } - - return rv; -} - -nsresult -nsParserMsgUtils::GetLocalizedStringByID(const char * aPropFileName, PRUint32 aID, nsString& oVal) -{ - oVal.Truncate(); - - nsCOMPtr<nsIStringBundle> bundle; - nsresult rv = GetBundle(aPropFileName,getter_AddRefs(bundle)); - if (NS_SUCCEEDED(rv) && bundle) { - nsXPIDLString valUni; - rv = bundle->GetStringFromID(aID, getter_Copies(valUni)); - if (NS_SUCCEEDED(rv) && valUni) { - oVal.Assign(valUni); - } - } - - return rv; -} diff --git a/htmlparser/src/nsParserMsgUtils.h b/htmlparser/src/nsParserMsgUtils.h deleted file mode 100644 index be3203c4d5fb..000000000000 --- a/htmlparser/src/nsParserMsgUtils.h +++ /dev/null @@ -1,53 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef nsParserMsgUtils_h -#define nsParserMsgUtils_h - -#include "nsString.h" - -#define XMLPARSER_PROPERTIES "chrome://communicator/locale/layout/xmlparser.properties" - -class nsParserMsgUtils { - nsParserMsgUtils(); // Currently this is not meant to be created, use the static methods - ~nsParserMsgUtils(); // If perf required, change this to cache values etc. -public: - static nsresult GetLocalizedStringByName(const char * aPropFileName, const char* aKey, nsString& aVal); - static nsresult GetLocalizedStringByID(const char * aPropFileName, PRUint32 aID, nsString& aVal); -}; - -#endif diff --git a/htmlparser/src/nsParserNode.cpp b/htmlparser/src/nsParserNode.cpp deleted file mode 100644 index cb7f02a94851..000000000000 --- a/htmlparser/src/nsParserNode.cpp +++ /dev/null @@ -1,387 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - - -#include "nsIAtom.h" -#include "nsParserNode.h" -#include <string.h> -#include "nsHTMLTokens.h" -#include "nsITokenizer.h" -#include "nsDTDUtils.h" - - -/** - * Default Constructor - */ -nsCParserNode::nsCParserNode() - : mRefCnt(0), mGenericState(PR_FALSE), mUseCount(0), mToken(nsnull), - mTokenAllocator(nsnull) -{ - MOZ_COUNT_CTOR(nsCParserNode); -#ifdef HEAP_ALLOCATED_NODES - mNodeAllocator = nsnull; -#endif -} - -/** - * Constructor - * - * @update gess 3/25/98 - * @param aToken -- token to init internal token - * @return - */ -nsCParserNode::nsCParserNode(CToken* aToken, - nsTokenAllocator* aTokenAllocator, - nsNodeAllocator* aNodeAllocator) - : mRefCnt(0), mGenericState(PR_FALSE), mUseCount(0), mToken(aToken), - mTokenAllocator(aTokenAllocator) -{ - MOZ_COUNT_CTOR(nsCParserNode); - - static int theNodeCount = 0; - ++theNodeCount; - IF_HOLD(mToken); - -#ifdef HEAP_ALLOCATED_NODES - mNodeAllocator = aNodeAllocator; -#endif -} - -/** - * destructor - * NOTE: We intentionally DONT recycle mToken here. - * It may get cached for use elsewhere - * @update gess 3/25/98 - * @param - * @return - */ -nsCParserNode::~nsCParserNode() { - MOZ_COUNT_DTOR(nsCParserNode); - ReleaseAll(); -#ifdef HEAP_ALLOCATED_NODES - if(mNodeAllocator) { - mNodeAllocator->Recycle(this); - } - mNodeAllocator = nsnull; -#endif - mTokenAllocator = 0; -} - - -/** - * Init - * - * @update gess 3/25/98 - * @param - * @return - */ - -nsresult -nsCParserNode::Init(CToken* aToken, - nsTokenAllocator* aTokenAllocator, - nsNodeAllocator* aNodeAllocator) -{ - mTokenAllocator = aTokenAllocator; - mToken = aToken; - IF_HOLD(mToken); - mGenericState = PR_FALSE; - mUseCount=0; -#ifdef HEAP_ALLOCATED_NODES - mNodeAllocator = aNodeAllocator; -#endif - return NS_OK; -} - -void -nsCParserNode::AddAttribute(CToken* aToken) -{ -} - - -/** - * Gets the name of this node. Currently unused. - * - * @update gess 3/25/98 - * @param - * @return string ref containing node name - */ -const nsAString& -nsCParserNode::GetTagName() const { - return EmptyString(); -} - - -/** - * Get text value of this node, which translates into - * getting the text value of the underlying token - * - * @update gess 3/25/98 - * @param - * @return string ref of text from internal token - */ -const nsAString& -nsCParserNode::GetText() const -{ - if (mToken) { - return mToken->GetStringValue(); - } - return EmptyString(); -} - -/** - * Get node type, meaning, get the tag type of the - * underlying token - * - * @update gess 3/25/98 - * @param - * @return int value that represents tag type - */ -PRInt32 -nsCParserNode::GetNodeType(void) const -{ - return (mToken) ? mToken->GetTypeID() : 0; -} - - -/** - * Gets the token type, which corresponds to a value from - * eHTMLTokens_xxx. - * - * @update gess 3/25/98 - * @param - * @return - */ -PRInt32 -nsCParserNode::GetTokenType(void) const -{ - return (mToken) ? mToken->GetTokenType() : 0; -} - - -/** - * Retrieve the number of attributes on this node - * - * @update gess 3/25/98 - * @param - * @return int -- representing attribute count - */ -PRInt32 -nsCParserNode::GetAttributeCount(PRBool askToken) const -{ - return 0; -} - -/** - * Retrieve the string rep of the attribute key at the - * given index. - * - * @update gess 3/25/98 - * @param anIndex-- offset of attribute to retrieve - * @return string rep of given attribute text key - */ -const nsAString& -nsCParserNode::GetKeyAt(PRUint32 anIndex) const -{ - return EmptyString(); -} - - -/** - * Retrieve the string rep of the attribute at given offset - * - * @update gess 3/25/98 - * @param anIndex-- offset of attribute to retrieve - * @return string rep of given attribute text value - */ -const nsAString& -nsCParserNode::GetValueAt(PRUint32 anIndex) const -{ - return EmptyString(); -} - -PRInt32 -nsCParserNode::TranslateToUnicodeStr(nsString& aString) const -{ - if (eToken_entity == mToken->GetTokenType()) { - return ((CEntityToken*)mToken)->TranslateToUnicodeStr(aString); - } - return -1; -} - -/** - * This getter retrieves the line number from the input source where - * the token occured. Lines are interpreted as occuring between \n characters. - * @update gess7/24/98 - * @return int containing the line number the token was found on - */ -PRInt32 -nsCParserNode::GetSourceLineNumber(void) const { - return mToken ? mToken->GetLineNumber() : 0; -} - -/** - * This method pop the attribute token - * @update harishd 03/25/99 - * @return token at anIndex - */ - -CToken* -nsCParserNode::PopAttributeToken() { - return 0; -} - -/** Retrieve a string containing the tag and its attributes in "source" form - * @update rickg 06June2000 - * @return void - */ -void -nsCParserNode::GetSource(nsString& aString) -{ - eHTMLTags theTag = mToken ? (eHTMLTags)mToken->GetTypeID() : eHTMLTag_unknown; - aString.Assign(PRUnichar('<')); - const PRUnichar* theTagName = nsHTMLTags::GetStringValue(theTag); - if(theTagName) { - aString.Append(theTagName); - } - aString.Append(PRUnichar('>')); -} - -/** Release all the objects you're holding to. - * @update harishd 08/02/00 - * @return void - */ -nsresult -nsCParserNode::ReleaseAll() -{ - if(mTokenAllocator) { - IF_FREE(mToken,mTokenAllocator); - } - return NS_OK; -} - -nsresult -nsCParserStartNode::Init(CToken* aToken, - nsTokenAllocator* aTokenAllocator, - nsNodeAllocator* aNodeAllocator) -{ - NS_ASSERTION(mAttributes.GetSize() == 0, "attributes not recycled!"); - return nsCParserNode::Init(aToken, aTokenAllocator, aNodeAllocator); -} - -void nsCParserStartNode::AddAttribute(CToken* aToken) -{ - NS_ASSERTION(0 != aToken, "Error: Token shouldn't be null!"); - mAttributes.Push(aToken); -} - -PRInt32 -nsCParserStartNode::GetAttributeCount(PRBool askToken) const -{ - PRInt32 result = 0; - if (askToken) { - result = mToken ? mToken->GetAttributeCount() : 0; - } - else { - result = mAttributes.GetSize(); - } - return result; -} - -const nsAString& -nsCParserStartNode::GetKeyAt(PRUint32 anIndex) const -{ - if ((PRInt32)anIndex < mAttributes.GetSize()) { - CAttributeToken* attr = - NS_STATIC_CAST(CAttributeToken*, mAttributes.ObjectAt(anIndex)); - if (attr) { - return attr->GetKey(); - } - } - return EmptyString(); -} - -const nsAString& -nsCParserStartNode::GetValueAt(PRUint32 anIndex) const -{ - if (PRInt32(anIndex) < mAttributes.GetSize()) { - CAttributeToken* attr = - NS_STATIC_CAST(CAttributeToken*, mAttributes.ObjectAt(anIndex)); - if (attr) { - return attr->GetValue(); - } - } - return EmptyString(); -} - -CToken* -nsCParserStartNode::PopAttributeToken() -{ - return NS_STATIC_CAST(CToken*, mAttributes.Pop()); -} - -void nsCParserStartNode::GetSource(nsString& aString) -{ - aString.Assign(PRUnichar('<')); - const PRUnichar* theTagName = - nsHTMLTags::GetStringValue(nsHTMLTag(mToken->GetTypeID())); - if (theTagName) { - aString.Append(theTagName); - } - PRInt32 index; - PRInt32 size = mAttributes.GetSize(); - for (index = 0 ; index < size; ++index) { - CAttributeToken *theToken = - NS_STATIC_CAST(CAttributeToken*, mAttributes.ObjectAt(index)); - if (theToken) { - theToken->AppendSourceTo(aString); - aString.Append(PRUnichar(' ')); //this will get removed... - } - } - aString.Append(PRUnichar('>')); -} - -nsresult nsCParserStartNode::ReleaseAll() -{ - NS_ASSERTION(0!=mTokenAllocator, "Error: no token allocator"); - CToken* theAttrToken; - while ((theAttrToken = NS_STATIC_CAST(CToken*, mAttributes.Pop()))) { - IF_FREE(theAttrToken, mTokenAllocator); - } - nsCParserNode::ReleaseAll(); - return NS_OK; -} - diff --git a/htmlparser/src/nsParserNode.h b/htmlparser/src/nsParserNode.h deleted file mode 100644 index e0eb0989b4bb..000000000000 --- a/htmlparser/src/nsParserNode.h +++ /dev/null @@ -1,321 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - - -/** - * MODULE NOTES: - * @update gess 4/1/98 - * - * This class is defines the basic interface between the - * parser and the content sink. The parser will iterate - * over the collection of tokens that it sees from the - * tokenizer, coverting each related "group" into one of - * these. This object gets passed to the sink, and is - * then immediately reused. - * - * If you want to hang onto one of these, you should - * make your own copy. - * - */ - -#ifndef NS_PARSERNODE__ -#define NS_PARSERNODE__ - -#include "nsIParserNode.h" -#include "nsToken.h" -#include "nsString.h" -#include "nsParserCIID.h" -#include "nsDeque.h" -#include "nsDTDUtils.h" - -class nsTokenAllocator; - -class nsCParserNode : public nsIParserNode { - - protected: - - PRInt32 mRefCnt; - - public: - - void AddRef() - { - ++mRefCnt; - } - - void Release(nsFixedSizeAllocator& aPool) - { - if (--mRefCnt == 0) - Destroy(this, aPool); - } - -#ifndef HEAP_ALLOCATED_NODES - protected: - - /** - * Hide operator new; clients should use Create() instead. - */ - static void* operator new(size_t) CPP_THROW_NEW { return 0; } - - /** - * Hide operator delete; clients should use Destroy() instead. - */ - static void operator delete(void*,size_t) {} - -#endif - - public: - static nsCParserNode* Create(CToken* aToken, - nsTokenAllocator* aTokenAllocator, - nsNodeAllocator* aNodeAllocator) - { -#ifdef HEAP_ALLOCATED_NODES - return new -#else - nsFixedSizeAllocator& pool = aNodeAllocator->GetArenaPool(); - void* place = pool.Alloc(sizeof(nsCParserNode)); - return ::new (place) -#endif - nsCParserNode(aToken, aTokenAllocator, aNodeAllocator); - } - - static void Destroy(nsCParserNode* aNode, nsFixedSizeAllocator& aPool) - { -#ifdef HEAP_ALLOCATED_NODES - delete aNode; -#else - aNode->~nsCParserNode(); - aPool.Free(aNode, sizeof(*aNode)); -#endif - } - - /** - * Default constructor - */ - nsCParserNode(); - - /** - * Constructor - * @update gess5/11/98 - * @param aToken is the token this node "refers" to - */ - nsCParserNode(CToken* aToken, - nsTokenAllocator* aTokenAllocator, - nsNodeAllocator* aNodeAllocator=0); - - /** - * Destructor - * @update gess5/11/98 - */ - virtual ~nsCParserNode(); - - /** - * Init - * @update gess5/11/98 - */ - virtual nsresult Init(CToken* aToken, - nsTokenAllocator* aTokenAllocator, - nsNodeAllocator* aNodeAllocator=0); - - /** - * Retrieve the name of the node - * @update gess5/11/98 - * @return string containing node name - */ - virtual const nsAString& GetTagName() const; - - /** - * Retrieve the text from the given node - * @update gess5/11/98 - * @return string containing node text - */ - virtual const nsAString& GetText() const; - - /** - * Retrieve the type of the parser node. - * @update gess5/11/98 - * @return node type. - */ - virtual PRInt32 GetNodeType() const; - - /** - * Retrieve token type of parser node - * @update gess5/11/98 - * @return token type - */ - virtual PRInt32 GetTokenType() const; - - - //*************************************** - //methods for accessing key/value pairs - //*************************************** - - /** - * Retrieve the number of attributes in this node. - * @update gess5/11/98 - * @return count of attributes (may be 0) - */ - virtual PRInt32 GetAttributeCount(PRBool askToken=PR_FALSE) const; - - /** - * Retrieve the key (of key/value pair) at given index - * @update gess5/11/98 - * @param anIndex is the index of the key you want - * @return string containing key. - */ - virtual const nsAString& GetKeyAt(PRUint32 anIndex) const; - - /** - * Retrieve the value (of key/value pair) at given index - * @update gess5/11/98 - * @param anIndex is the index of the value you want - * @return string containing value. - */ - virtual const nsAString& GetValueAt(PRUint32 anIndex) const; - - /** - * NOTE: When the node is an entity, this will translate the entity - * to it's unicode value, and store it in aString. - * @update gess5/11/98 - * @param aString will contain the resulting unicode string value - * @return int (unicode char or unicode index from table) - */ - virtual PRInt32 TranslateToUnicodeStr(nsString& aString) const; - - /** - * - * @update gess5/11/98 - * @param - * @return - */ - virtual void AddAttribute(CToken* aToken); - - /** - * This getter retrieves the line number from the input source where - * the token occured. Lines are interpreted as occuring between \n characters. - * @update gess7/24/98 - * @return int containing the line number the token was found on - */ - virtual PRInt32 GetSourceLineNumber(void) const; - - /** This method pop the attribute token from the given index - * @update harishd 03/25/99 - * @return token at anIndex - */ - virtual CToken* PopAttributeToken(); - - /** Retrieve a string containing the tag and its attributes in "source" form - * @update rickg 06June2000 - * @return void - */ - virtual void GetSource(nsString& aString); - - /** - * This pair of methods allows us to set a generic bit (for arbitrary use) - * on each node stored in the context. - * @update gess 11May2000 - */ - virtual PRBool GetGenericState(void) const {return mGenericState;} - virtual void SetGenericState(PRBool aState) {mGenericState=aState;} - - /** Release all the objects you're holding - * @update harishd 08/02/00 - * @return void - */ - virtual nsresult ReleaseAll(); - - PRPackedBool mGenericState; - PRInt32 mUseCount; - CToken* mToken; - - nsTokenAllocator* mTokenAllocator; -#ifdef HEAP_ALLOCATED_NODES - nsNodeAllocator* mNodeAllocator; // weak -#endif -}; - - -class nsCParserStartNode : public nsCParserNode -{ -public: - static nsCParserNode* Create(CToken* aToken, - nsTokenAllocator* aTokenAllocator, - nsNodeAllocator* aNodeAllocator) - { -#ifdef HEAP_ALLOCATED_NODES - return new -#else - nsFixedSizeAllocator& pool = aNodeAllocator->GetArenaPool(); - void* place = pool.Alloc(sizeof(nsCParserStartNode)); - return ::new (place) -#endif - nsCParserStartNode(aToken, aTokenAllocator, aNodeAllocator); - } - - nsCParserStartNode() - : nsCParserNode(), mAttributes(0) { } - - nsCParserStartNode(CToken* aToken, - nsTokenAllocator* aTokenAllocator, - nsNodeAllocator* aNodeAllocator = 0) - : nsCParserNode(aToken, aTokenAllocator, aNodeAllocator), mAttributes(0) { } - - virtual ~nsCParserStartNode() - { - NS_ASSERTION(0 != mTokenAllocator, "Error: no token allocator"); - CToken* theAttrToken = 0; - while ((theAttrToken = NS_STATIC_CAST(CToken*, mAttributes.Pop()))) { - IF_FREE(theAttrToken, mTokenAllocator); - } - } - - virtual nsresult Init(CToken* aToken, - nsTokenAllocator* aTokenAllocator, - nsNodeAllocator* aNodeAllocator = 0); - virtual void AddAttribute(CToken* aToken); - virtual PRInt32 GetAttributeCount(PRBool askToken = PR_FALSE) const; - virtual const nsAString& GetKeyAt(PRUint32 anIndex) const; - virtual const nsAString& GetValueAt(PRUint32 anIndex) const; - virtual CToken* PopAttributeToken(); - virtual void GetSource(nsString& aString); - virtual nsresult ReleaseAll(); -protected: - nsDeque mAttributes; -}; - -#endif - diff --git a/htmlparser/src/nsParserService.cpp b/htmlparser/src/nsParserService.cpp deleted file mode 100644 index f3579724158a..000000000000 --- a/htmlparser/src/nsParserService.cpp +++ /dev/null @@ -1,274 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- - * - * ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "nsDOMError.h" -#include "nsIAtom.h" -#include "nsParserService.h" -#include "nsHTMLEntities.h" -#include "nsElementTable.h" -#include "nsICategoryManager.h" -#include "nsCategoryManagerUtils.h" - -extern "C" int MOZ_XMLCheckQName(const char* ptr, const char* end, - int ns_aware, const char** colon); - -nsParserService::nsParserService() : mEntries(0) -{ - mHaveNotifiedCategoryObservers = PR_FALSE; -} - -nsParserService::~nsParserService() -{ - nsObserverEntry *entry = nsnull; - while( (entry = NS_STATIC_CAST(nsObserverEntry*,mEntries.Pop())) ) { - NS_RELEASE(entry); - } -} - -NS_IMPL_ISUPPORTS1(nsParserService, nsIParserService) - -NS_IMETHODIMP -nsParserService::HTMLAtomTagToId(nsIAtom* aAtom, PRInt32* aId) const -{ - nsAutoString tagName; - aAtom->ToString(tagName); - - *aId = nsHTMLTags::LookupTag(tagName); - - return NS_OK; -} - -NS_IMETHODIMP -nsParserService::HTMLCaseSensitiveAtomTagToId(nsIAtom* aAtom, - PRInt32* aId) const -{ - nsAutoString tagName; - aAtom->ToString(tagName); - - *aId = nsHTMLTags::CaseSensitiveLookupTag(tagName.get()); - - return NS_OK; -} - -NS_IMETHODIMP -nsParserService::HTMLStringTagToId(const nsAString &aTagName, - PRInt32* aId) const -{ - *aId = nsHTMLTags::LookupTag(aTagName); - - return NS_OK; -} - -NS_IMETHODIMP -nsParserService::HTMLIdToStringTag(PRInt32 aId, - const PRUnichar **aTagName) const -{ - *aTagName = nsHTMLTags::GetStringValue((nsHTMLTag)aId); - - return NS_OK; -} - -NS_IMETHODIMP -nsParserService::HTMLConvertEntityToUnicode(const nsAString& aEntity, - PRInt32* aUnicode) const -{ - *aUnicode = nsHTMLEntities::EntityToUnicode(aEntity); - - return NS_OK; -} - -NS_IMETHODIMP -nsParserService::HTMLConvertUnicodeToEntity(PRInt32 aUnicode, - nsCString& aEntity) const -{ - const char* str = nsHTMLEntities::UnicodeToEntity(aUnicode); - if (str) { - aEntity.Assign(str); - } - - return NS_OK; -} - -NS_IMETHODIMP -nsParserService::IsContainer(PRInt32 aId, PRBool& aIsContainer) const -{ - aIsContainer = nsHTMLElement::IsContainer((eHTMLTags)aId); - - return NS_OK; -} - -NS_IMETHODIMP -nsParserService::IsBlock(PRInt32 aId, PRBool& aIsBlock) const -{ - if((aId>eHTMLTag_unknown) && (aId<eHTMLTag_userdefined)) { - aIsBlock=((gHTMLElements[aId].IsMemberOf(kBlock)) || - (gHTMLElements[aId].IsMemberOf(kBlockEntity)) || - (gHTMLElements[aId].IsMemberOf(kHeading)) || - (gHTMLElements[aId].IsMemberOf(kPreformatted))|| - (gHTMLElements[aId].IsMemberOf(kList))); - } - else { - aIsBlock = PR_FALSE; - } - - return NS_OK; -} - -NS_IMETHODIMP -nsParserService::RegisterObserver(nsIElementObserver* aObserver, - const nsAString& aTopic, - const eHTMLTags* aTags) -{ - nsresult result = NS_OK; - nsObserverEntry* entry = GetEntry(aTopic); - - if(!entry) { - result = CreateEntry(aTopic,&entry); - NS_ENSURE_SUCCESS(result,result); - } - - while (*aTags) { - if(*aTags != eHTMLTag_userdefined && *aTags <= NS_HTML_TAG_MAX) { - entry->AddObserver(aObserver,*aTags); - } - ++aTags; - } - - return result; -} - -NS_IMETHODIMP -nsParserService::UnregisterObserver(nsIElementObserver* aObserver, - const nsAString& aTopic) -{ - PRInt32 count = mEntries.GetSize(); - - for (PRInt32 i=0; i < count; ++i) { - nsObserverEntry* entry = NS_STATIC_CAST(nsObserverEntry*,mEntries.ObjectAt(i)); - if (entry && entry->Matches(aTopic)) { - entry->RemoveObserver(aObserver); - } - } - - return NS_OK; -} - -NS_IMETHODIMP -nsParserService::GetTopicObservers(const nsAString& aTopic, - nsIObserverEntry** aEntry) { - nsresult result = NS_OK; - nsObserverEntry* entry = GetEntry(aTopic); - - if (!entry) { - return NS_ERROR_NULL_POINTER; - } - - NS_ADDREF(*aEntry = entry); - - return result; -} - -nsresult -nsParserService::CheckQName(const nsASingleFragmentString& aQName, - PRBool aNamespaceAware, - const PRUnichar** aColon) -{ - const char* colon; - const PRUnichar *begin, *end; - aQName.BeginReading(begin); - aQName.EndReading(end); - int result = MOZ_XMLCheckQName(NS_REINTERPRET_CAST(const char*, begin), - NS_REINTERPRET_CAST(const char*, end), - aNamespaceAware, &colon); - *aColon = NS_REINTERPRET_CAST(const PRUnichar*, colon); - - if (result == 0) { - return NS_OK; - } - - // MOZ_EXPAT_EMPTY_QNAME || MOZ_EXPAT_INVALID_CHARACTER - if (result & (1 << 0) || result & (1 << 1)) { - return NS_ERROR_DOM_INVALID_CHARACTER_ERR; - } - - return NS_ERROR_DOM_NAMESPACE_ERR; -} - -class nsMatchesTopic : public nsDequeFunctor{ - const nsAString& mString; -public: - PRBool matched; - nsObserverEntry* entry; - nsMatchesTopic(const nsAString& aString):mString(aString),matched(PR_FALSE){}; - virtual void* operator()(void* anObject){ - entry=NS_STATIC_CAST(nsObserverEntry*, anObject); - matched=mString.Equals(entry->mTopic); - return matched ? nsnull : anObject; - }; -}; - -// XXX This may be more efficient as a HashTable instead of linear search -nsObserverEntry* -nsParserService::GetEntry(const nsAString& aTopic) -{ - if (!mHaveNotifiedCategoryObservers) { - mHaveNotifiedCategoryObservers = PR_TRUE; - NS_CreateServicesFromCategory("parser-service-category", - NS_STATIC_CAST(nsISupports*,NS_STATIC_CAST(void*,this)), - "parser-service-start"); - } - - nsMatchesTopic matchesTopic(aTopic); - mEntries.FirstThat(*&matchesTopic); - return matchesTopic.matched?matchesTopic.entry:nsnull; -} - -nsresult -nsParserService::CreateEntry(const nsAString& aTopic, nsObserverEntry** aEntry) -{ - *aEntry = new nsObserverEntry(aTopic); - - if (!aEntry) { - return NS_ERROR_OUT_OF_MEMORY; - } - - NS_ADDREF(*aEntry); - mEntries.Push(*aEntry); - - return NS_OK; -} diff --git a/htmlparser/src/nsParserService.h b/htmlparser/src/nsParserService.h deleted file mode 100644 index 730dbdd0b07e..000000000000 --- a/htmlparser/src/nsParserService.h +++ /dev/null @@ -1,102 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- - * - * ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -#ifndef NS_PARSERSERVICE_H__ -#define NS_PARSERSERVICE_H__ - -#include "nsIParserService.h" -#include "nsDTDUtils.h" -#include "nsVoidArray.h" - -extern "C" int MOZ_XMLIsLetter(const char* ptr); -extern "C" int MOZ_XMLIsNCNameChar(const char* ptr); - -class nsParserService : public nsIParserService { -public: - nsParserService(); - virtual ~nsParserService(); - - NS_DECL_ISUPPORTS - - NS_IMETHOD HTMLAtomTagToId(nsIAtom* aAtom, PRInt32* aId) const; - - NS_IMETHOD HTMLCaseSensitiveAtomTagToId(nsIAtom* aAtom, PRInt32* aId) const; - - NS_IMETHOD HTMLStringTagToId(const nsAString &aTagName, - PRInt32* aId) const; - - NS_IMETHOD HTMLIdToStringTag(PRInt32 aId, const PRUnichar **aTagName) const; - - NS_IMETHOD HTMLConvertEntityToUnicode(const nsAString& aEntity, - PRInt32* aUnicode) const; - NS_IMETHOD HTMLConvertUnicodeToEntity(PRInt32 aUnicode, - nsCString& aEntity) const; - NS_IMETHOD IsContainer(PRInt32 aId, PRBool& aIsContainer) const; - NS_IMETHOD IsBlock(PRInt32 aId, PRBool& aIsBlock) const; - - // Observer mechanism - NS_IMETHOD RegisterObserver(nsIElementObserver* aObserver, - const nsAString& aTopic, - const eHTMLTags* aTags = nsnull); - - NS_IMETHOD UnregisterObserver(nsIElementObserver* aObserver, - const nsAString& aTopic); - NS_IMETHOD GetTopicObservers(const nsAString& aTopic, - nsIObserverEntry** aEntry); - - nsresult CheckQName(const nsASingleFragmentString& aQName, - PRBool aNamespaceAware, const PRUnichar** aColon); - - PRBool IsXMLLetter(PRUnichar aChar) - { - return MOZ_XMLIsLetter(NS_REINTERPRET_CAST(const char*, &aChar)); - } - PRBool IsXMLNCNameChar(PRUnichar aChar) - { - return MOZ_XMLIsNCNameChar(NS_REINTERPRET_CAST(const char*, &aChar)); - } - -protected: - nsObserverEntry* GetEntry(const nsAString& aTopic); - nsresult CreateEntry(const nsAString& aTopic, - nsObserverEntry** aEntry); - - nsDeque mEntries; //each topic holds a list of observers per tag. - PRBool mHaveNotifiedCategoryObservers; -}; - -#endif diff --git a/htmlparser/src/nsScanner.cpp b/htmlparser/src/nsScanner.cpp deleted file mode 100644 index 1e0430749d48..000000000000 --- a/htmlparser/src/nsScanner.cpp +++ /dev/null @@ -1,1371 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -//#define __INCREMENTAL 1 - -#include "nsScanner.h" -#include "nsDebug.h" -#include "nsIServiceManager.h" -#include "nsICharsetConverterManager.h" -#include "nsICharsetAlias.h" -#include "nsReadableUtils.h" -#include "nsIInputStream.h" -#include "nsILocalFile.h" -#include "nsNetUtil.h" -#include "nsUTF8Utils.h" // for LossyConvertEncoding - -static NS_DEFINE_CID(kCharsetAliasCID, NS_CHARSETALIAS_CID); - -nsReadEndCondition::nsReadEndCondition(const PRUnichar* aTerminateChars) : - mChars(aTerminateChars), mFilter(PRUnichar(~0)) // All bits set -{ - // Build filter that will be used to filter out characters with - // bits that none of the terminal chars have. This works very well - // because terminal chars often have only the last 4-6 bits set and - // normal ascii letters have bit 7 set. Other letters have even higher - // bits set. - - // Calculate filter - const PRUnichar *current = aTerminateChars; - PRUnichar terminalChar = *current; - while (terminalChar) { - mFilter &= ~terminalChar; - ++current; - terminalChar = *current; - } -} - -static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID); - -static const char kBadHTMLText[] ="<H3>Oops...</H3>You just tried to read a non-existent document: <BR>"; -static const char kUnorderedStringError[] = "String argument must be ordered. Don't you read API's?"; - -#ifdef __INCREMENTAL -const int kBufsize=1; -#else -const int kBufsize=64; -#endif - -MOZ_DECL_CTOR_COUNTER(nsScanner) - -/** - * Use this constructor if you want i/o to be based on - * a single string you hand in during construction. - * This short cut was added for Javascript. - * - * @update gess 5/12/98 - * @param aMode represents the parser mode (nav, other) - * @return - */ -nsScanner::nsScanner(const nsAString& anHTMLString, const nsACString& aCharset, PRInt32 aSource) -{ - MOZ_COUNT_CTOR(nsScanner); - - mTotalRead = anHTMLString.Length(); - mSlidingBuffer = nsnull; - mCountRemaining = 0; - AppendToBuffer(anHTMLString); - mSlidingBuffer->BeginReading(mCurrentPosition); - mMarkPosition = mCurrentPosition; - mIncremental=PR_FALSE; - mUnicodeDecoder = 0; - mCharsetSource = kCharsetUninitialized; - SetDocumentCharset(aCharset, aSource); -} - -/** - * Use this constructor if you want i/o to be based on strings - * the scanner receives. If you pass a null filename, you - * can still provide data to the scanner via append. - * - * @update gess 5/12/98 - * @param aFilename -- - * @return - */ -nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream, const nsACString& aCharset, PRInt32 aSource) : - mFilename(aFilename) -{ - MOZ_COUNT_CTOR(nsScanner); - - mSlidingBuffer = nsnull; - - // XXX This is a big hack. We need to initialize the iterators to something. - // What matters is that mCurrentPosition == mEndPosition, so that our methods - // believe that we are at EOF (see bug 182067). We null out mCurrentPosition - // so that we have some hope of catching null pointer dereferences associated - // with this hack. --darin - memset(&mCurrentPosition, 0, sizeof(mCurrentPosition)); - mMarkPosition = mCurrentPosition; - mEndPosition = mCurrentPosition; - - mIncremental=PR_TRUE; - mCountRemaining = 0; - mTotalRead=0; - - if(aCreateStream) { - nsCOMPtr<nsILocalFile> file; - nsCOMPtr<nsIInputStream> fileStream; - - NS_NewLocalFile(aFilename, PR_TRUE, getter_AddRefs(file)); - if (file) - NS_NewLocalFileInputStream(getter_AddRefs(mInputStream), file); - - } //if - mUnicodeDecoder = 0; - mCharsetSource = kCharsetUninitialized; - SetDocumentCharset(aCharset, aSource); -} - -/** - * Use this constructor if you want i/o to be stream based. - * - * @update gess 5/12/98 - * @param aStream -- - * @param assumeOwnership -- - * @param aFilename -- - * @return - */ -nsScanner::nsScanner(const nsAString& aFilename,nsIInputStream* aStream,const nsACString& aCharset, PRInt32 aSource) : - mFilename(aFilename) -{ - MOZ_COUNT_CTOR(nsScanner); - - mSlidingBuffer = nsnull; - - // XXX This is a big hack. We need to initialize the iterators to something. - // What matters is that mCurrentPosition == mEndPosition, so that our methods - // believe that we are at EOF (see bug 182067). We null out mCurrentPosition - // so that we have some hope of catching null pointer dereferences associated - // with this hack. --darin - memset(&mCurrentPosition, 0, sizeof(mCurrentPosition)); - mMarkPosition = mCurrentPosition; - mEndPosition = mCurrentPosition; - - mIncremental=PR_FALSE; - mCountRemaining = 0; - mTotalRead=0; - mInputStream=aStream; - mUnicodeDecoder = 0; - mCharsetSource = kCharsetUninitialized; - SetDocumentCharset(aCharset, aSource); -} - - -nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , PRInt32 aSource) { - - nsresult res = NS_OK; - - if( aSource < mCharsetSource) // priority is lower the the current one , just - return res; - - nsCOMPtr<nsICharsetAlias> calias(do_GetService(kCharsetAliasCID, &res)); - NS_ASSERTION( nsnull != calias, "cannot find charset alias"); - if( NS_SUCCEEDED(res) && (nsnull != calias)) - { - PRBool same = PR_FALSE; - res = calias->Equals(aCharset, mCharset, &same); - if(NS_SUCCEEDED(res) && same) - { - return NS_OK; // no difference, don't change it - } - // different, need to change it - nsCAutoString charsetName; - res = calias->GetPreferred(aCharset, charsetName); - - if(NS_FAILED(res) && (kCharsetUninitialized == mCharsetSource) ) - { - // failed - unknown alias , fallback to ISO-8859-1 - charsetName.Assign(NS_LITERAL_CSTRING("ISO-8859-1")); - } - mCharset = charsetName; - mCharsetSource = aSource; - - nsCOMPtr<nsICharsetConverterManager> ccm = - do_GetService(kCharsetConverterManagerCID, &res); - if(NS_SUCCEEDED(res) && (nsnull != ccm)) - { - nsIUnicodeDecoder * decoder = nsnull; - res = ccm->GetUnicodeDecoderRaw(mCharset.get(), &decoder); - if(NS_SUCCEEDED(res) && (nsnull != decoder)) - { - NS_IF_RELEASE(mUnicodeDecoder); - - mUnicodeDecoder = decoder; - } - } - } - return res; -} - - -/** - * default destructor - * - * @update gess 3/25/98 - * @param - * @return - */ -nsScanner::~nsScanner() { - - if (mSlidingBuffer) { - delete mSlidingBuffer; - } - - MOZ_COUNT_DTOR(nsScanner); - - if(mInputStream) { - mInputStream->Close(); - mInputStream = 0; - } - - NS_IF_RELEASE(mUnicodeDecoder); -} - -/** - * Resets current offset position of input stream to marked position. - * This allows us to back up to this point if the need should arise, - * such as when tokenization gets interrupted. - * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST! - * - * @update gess 5/12/98 - * @param - * @return - */ -void nsScanner::RewindToMark(void){ - mCountRemaining += (Distance(mMarkPosition, mCurrentPosition)); - mCurrentPosition = mMarkPosition; -} - - -/** - * Records current offset position in input stream. This allows us - * to back up to this point if the need should arise, such as when - * tokenization gets interrupted. - * - * @update gess 7/29/98 - * @param - * @return - */ -void nsScanner::Mark() { - if (mSlidingBuffer) { - mSlidingBuffer->DiscardPrefix(mCurrentPosition); - mSlidingBuffer->BeginReading(mCurrentPosition); - mMarkPosition = mCurrentPosition; - } -} - - -/** - * Insert data to our underlying input buffer as - * if it were read from an input stream. - * - * @update harishd 01/12/99 - * @return error code - */ -PRBool nsScanner::UngetReadable(const nsAString& aBuffer) { - - mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition); - mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators - mSlidingBuffer->EndReading(mEndPosition); - - PRUint32 length = aBuffer.Length(); - mCountRemaining += length; // Ref. bug 117441 - mTotalRead += length; - return PR_TRUE; -} - -/** - * Append data to our underlying input buffer as - * if it were read from an input stream. - * - * @update gess4/3/98 - * @return error code - */ -nsresult nsScanner::Append(const nsAString& aBuffer) { - - mTotalRead += aBuffer.Length(); - AppendToBuffer(aBuffer); - return NS_OK; -} - -/** - * - * - * @update gess 5/21/98 - * @param - * @return - */ -nsresult nsScanner::Append(const char* aBuffer, PRUint32 aLen){ - nsresult res=NS_OK; - PRUnichar *unichars, *start; - if(mUnicodeDecoder) { - PRInt32 unicharBufLen = 0; - mUnicodeDecoder->GetMaxLength(aBuffer, aLen, &unicharBufLen); - nsScannerString::Buffer* buffer = nsScannerString::AllocBuffer(unicharBufLen + 1); - NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY); - start = unichars = buffer->DataStart(); - - PRInt32 totalChars = 0; - PRInt32 unicharLength = unicharBufLen; - do { - PRInt32 srcLength = aLen; - res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength); - - totalChars += unicharLength; - // Continuation of failure case - if(NS_FAILED(res)) { - // if we failed, we consume one byte, replace it with U+FFFD - // and try the conversion again. - unichars[unicharLength++] = (PRUnichar)0xFFFD; - unichars = unichars + unicharLength; - unicharLength = unicharBufLen - (++totalChars); - - mUnicodeDecoder->Reset(); - - if(((PRUint32) (srcLength + 1)) > aLen) { - srcLength = aLen; - } - else { - ++srcLength; - } - - aBuffer += srcLength; - aLen -= srcLength; - } - } while (NS_FAILED(res) && (aLen > 0)); - - buffer->SetDataLength(totalChars); - AppendToBuffer(buffer); - mTotalRead += totalChars; - - // Don't propagate return code of unicode decoder - // since it doesn't reflect on our success or failure - // - Ref. bug 87110 - res = NS_OK; - } - else { - AppendASCIItoBuffer(aBuffer, aLen); - mTotalRead+=aLen; - } - - return res; -} - - -/** - * Grab data from underlying stream. - * - * @update gess4/3/98 - * @return error code - */ -nsresult nsScanner::FillBuffer(void) { - nsresult result=NS_OK; - - if(!mInputStream) { -#if 0 - //This is DEBUG code!!!!!! XXX DEBUG XXX - //If you're here, it means someone tried to load a - //non-existent document. So as a favor, we emit a - //little bit of HTML explaining the error. - if(0==mTotalRead) { - mBuffer.Append((const char*)kBadHTMLText); - mBuffer.Append(mFilename); - mTotalRead+=mBuffer.Length(); - } - else -#endif - result=kEOF; - } - else { - PRUint32 numread=0; - char buf[kBufsize+1]; - buf[kBufsize]=0; - - // XXX use ReadSegments to avoid extra buffer copy? --darin - - result = mInputStream->Read(buf, kBufsize, &numread); - if (0 == numread) { - return kEOF; - } - - if((0<numread) && (0==result)) { - AppendASCIItoBuffer(buf, numread); - } - mTotalRead+=numread; - } - - return result; -} - -/** - * determine if the scanner has reached EOF - * - * @update gess 5/12/98 - * @param - * @return 0=!eof 1=eof - */ -nsresult nsScanner::Eof() { - nsresult theError=NS_OK; - - if (!mSlidingBuffer) { - return kEOF; - } - - theError=FillBuffer(); - - if(NS_OK==theError) { - if (0==(PRUint32)mSlidingBuffer->Length()) { - return kEOF; - } - } - - return theError; -} - -/** - * retrieve next char from scanners internal input stream - * - * @update gess 3/25/98 - * @param - * @return error code reflecting read status - */ -nsresult nsScanner::GetChar(PRUnichar& aChar) { - nsresult result=NS_OK; - aChar=0; - - if (!mSlidingBuffer) { - return kEOF; - } - - if (mCurrentPosition == mEndPosition) { - result=Eof(); - } - - if(NS_OK == result){ - aChar=*mCurrentPosition++; - --mCountRemaining; - } - return result; -} - - -/** - * peek ahead to consume next char from scanner's internal - * input buffer - * - * @update gess 3/25/98 - * @param - * @return - */ -nsresult nsScanner::Peek(PRUnichar& aChar, PRUint32 aOffset) { - nsresult result=NS_OK; - aChar=0; - - if (!mSlidingBuffer) { - return kEOF; - } - - if (mCurrentPosition == mEndPosition) { - result=Eof(); - } - - if(NS_OK == result){ - if (aOffset) { - while ((NS_OK == result) && (mCountRemaining <= aOffset)) { - result = Eof(); - } - - if (NS_OK == result) { - nsScannerIterator pos = mCurrentPosition; - pos.advance(aOffset); - aChar=*pos; - } - } - else { - aChar=*mCurrentPosition; - } - } - - return result; -} - -nsresult nsScanner::Peek(nsAString& aStr, PRInt32 aNumChars) -{ - if (!mSlidingBuffer) { - return kEOF; - } - - if (mCurrentPosition == mEndPosition) { - return Eof(); - } - - nsScannerIterator start, end; - - start = mCurrentPosition; - - if (mCountRemaining < PRUint32(aNumChars)) { - end = mEndPosition; - } - else { - end = start; - end.advance(aNumChars); - } - - CopyUnicodeTo(start, end, aStr); - - return NS_OK; -} - - -/** - * Skip whitespace on scanner input stream - * - * @update gess 3/25/98 - * @param - * @return error status - */ -nsresult nsScanner::SkipWhitespace(PRInt32& aNewlinesSkipped) { - - if (!mSlidingBuffer) { - return kEOF; - } - - PRUnichar theChar = 0; - nsresult result = Peek(theChar); - - if (result == kEOF) { - // XXX why wouldn't Eof() return kEOF?? --darin - return Eof(); - } - - nsScannerIterator current = mCurrentPosition; - PRBool done = PR_FALSE; - PRBool skipped = PR_FALSE; - - while (!done && current != mEndPosition) { - switch(theChar) { - case '\n': - case '\r': ++aNewlinesSkipped; - case ' ' : - case '\b': - case '\t': - { - skipped = PR_TRUE; - PRUnichar thePrevChar = theChar; - theChar = (++current != mEndPosition) ? *current : '\0'; - if ((thePrevChar == '\r' && theChar == '\n') || - (thePrevChar == '\n' && theChar == '\r')) { - theChar = (++current != mEndPosition) ? *current : '\0'; // CRLF == LFCR => LF - } - } - break; - default: - done = PR_TRUE; - break; - } - } - - if (skipped) { - SetPosition(current); - if (current == mEndPosition) { - result = Eof(); - } - } - - return result; -} - -/** - * Skip over chars as long as they equal given char - * - * @update gess 3/25/98 - * @param - * @return error code - */ -nsresult nsScanner::SkipOver(PRUnichar aSkipChar){ - - if (!mSlidingBuffer) { - return kEOF; - } - - PRUnichar ch=0; - nsresult result=NS_OK; - - while(NS_OK==result) { - result=Peek(ch); - if(NS_OK == result) { - if(ch!=aSkipChar) { - break; - } - GetChar(ch); - } - else break; - } //while - return result; - -} - -/** - * Skip over chars as long as they're in aSkipSet - * - * @update gess 3/25/98 - * @param aSkipSet is an ordered string. - * @return error code - */ -nsresult nsScanner::SkipOver(nsString& aSkipSet){ - - if (!mSlidingBuffer) { - return kEOF; - } - - PRUnichar theChar=0; - nsresult result=NS_OK; - - while(NS_OK==result) { - result=Peek(theChar); - if(NS_OK == result) { - PRInt32 pos=aSkipSet.FindChar(theChar); - if(kNotFound==pos) { - break; - } - GetChar(theChar); - } - else break; - } //while - return result; - -} - - -/** - * Skip over chars until they're in aValidSet - * - * @update gess 3/25/98 - * @param aValid set is an ordered string that - * contains chars you're looking for - * @return error code - */ -nsresult nsScanner::SkipTo(nsString& aValidSet){ - if (!mSlidingBuffer) { - return kEOF; - } - - PRUnichar ch=0; - nsresult result=NS_OK; - - while(NS_OK==result) { - result=Peek(ch); - if(NS_OK == result) { - PRInt32 pos=aValidSet.FindChar(ch); - if(kNotFound!=pos) { - break; - } - GetChar(ch); - } - else break; - } //while - return result; -} - -#if 0 -void DoErrTest(nsString& aString) { - PRInt32 pos=aString.FindChar(0); - if(kNotFound<pos) { - if(aString.Length()-1!=pos) { - } - } -} - -void DoErrTest(nsCString& aString) { - PRInt32 pos=aString.FindChar(0); - if(kNotFound<pos) { - if(aString.Length()-1!=pos) { - } - } -} -#endif - -/** - * Skip over chars as long as they're in aValidSet - * - * @update gess 3/25/98 - * @param aValidSet is an ordered string containing the - * characters you want to skip - * @return error code - */ -nsresult nsScanner::SkipPast(nsString& aValidSet){ - NS_NOTYETIMPLEMENTED("Error: SkipPast not yet implemented."); - return NS_OK; -} - -/** - * Consume characters until you run into space, a '<', a '>', or a '/'. - * - * @param aString - receives new data from stream - * @return error code - */ -nsresult nsScanner::ReadTagIdentifier(nsString& aString) { - - if (!mSlidingBuffer) { - return kEOF; - } - - PRUnichar theChar=0; - nsresult result=Peek(theChar); - nsScannerIterator current, end; - PRBool found=PR_FALSE; - - current = mCurrentPosition; - end = mEndPosition; - - while(current != end) { - - theChar=*current; - if(theChar) { - found = PR_TRUE; - switch(theChar) { - case '\n': - case '\r': - case ' ' : - case '\b': - case '\t': - case '\v': - case '\f': - case '<': - case '>': - case '/': - found = PR_FALSE; - break; - default: - break; - } - - if(!found) { - // If we the current character isn't a valid character for - // the identifier, we're done. Append the results to - // the string passed in. - AppendUnicodeTo(mCurrentPosition, current, aString); - break; - } - } - ++current; - } - - SetPosition(current); - if (current == end) { - result = Eof(); - } - - //DoErrTest(aString); - - return result; -} - -/** - * Consume characters until you run into a char that's not valid in an - * entity name - * - * @param aString - receives new data from stream - * @return error code - */ -nsresult nsScanner::ReadEntityIdentifier(nsString& aString) { - - if (!mSlidingBuffer) { - return kEOF; - } - - PRUnichar theChar=0; - nsresult result=Peek(theChar); - nsScannerIterator origin, current, end; - PRBool found=PR_FALSE; - - origin = mCurrentPosition; - current = mCurrentPosition; - end = mEndPosition; - - while(current != end) { - - theChar=*current; - if(theChar) { - found=PR_FALSE; - switch(theChar) { - case '_': - case '-': - case '.': - // Don't allow ':' in entity names. See bug 23791 - found = PR_TRUE; - break; - default: - found = ('a'<=theChar && theChar<='z') || - ('A'<=theChar && theChar<='Z') || - ('0'<=theChar && theChar<='9'); - break; - } - - if(!found) { - AppendUnicodeTo(mCurrentPosition, current, aString); - break; - } - } - ++current; - } - - SetPosition(current); - if (current == end) { - AppendUnicodeTo(origin, current, aString); - return Eof(); - } - - //DoErrTest(aString); - - return result; -} - -/** - * Consume digits - * - * @param aString - should contain digits - * @return error code - */ -nsresult nsScanner::ReadNumber(nsString& aString,PRInt32 aBase) { - - if (!mSlidingBuffer) { - return kEOF; - } - - NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported"); - - PRUnichar theChar=0; - nsresult result=Peek(theChar); - nsScannerIterator origin, current, end; - - origin = mCurrentPosition; - current = origin; - end = mEndPosition; - - PRBool done = PR_FALSE; - while(current != end) { - theChar=*current; - if(theChar) { - done = (theChar < '0' || theChar > '9') && - ((aBase == 16)? (theChar < 'A' || theChar > 'F') && - (theChar < 'a' || theChar > 'f') - :PR_TRUE); - if(done) { - AppendUnicodeTo(origin, current, aString); - break; - } - } - ++current; - } - - SetPosition(current); - if (current == end) { - AppendUnicodeTo(origin, current, aString); - return Eof(); - } - - //DoErrTest(aString); - - return result; -} - -/** - * Consume characters until you find the terminal char - * - * @update gess 3/25/98 - * @param aString receives new data from stream - * @param addTerminal tells us whether to append terminal to aString - * @return error code - */ -nsresult nsScanner::ReadWhitespace(nsString& aString, - PRInt32& aNewlinesSkipped) { - - if (!mSlidingBuffer) { - return kEOF; - } - - PRUnichar theChar = 0; - nsresult result = Peek(theChar); - - if (result == kEOF) { - return Eof(); - } - - nsScannerIterator origin, current, end; - PRBool done = PR_FALSE; - - origin = mCurrentPosition; - current = origin; - end = mEndPosition; - - while(!done && current != end) { - switch(theChar) { - case '\n': - case '\r': - { - ++aNewlinesSkipped; - PRUnichar thePrevChar = theChar; - theChar = (++current != end) ? *current : '\0'; - if ((thePrevChar == '\r' && theChar == '\n') || - (thePrevChar == '\n' && theChar == '\r')) { - theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF - } else if (thePrevChar == '\r') { - // Lone CR becomes CRLF; callers should know to remove extra CRs - AppendUnicodeTo(origin, current, aString); - aString.Append(PRUnichar('\n')); - origin = current; - } - } - break; - case ' ' : - case '\b': - case '\t': - theChar = (++current != end) ? *current : '\0'; - break; - default: - done = PR_TRUE; - AppendUnicodeTo(origin, current, aString); - break; - } - } - - SetPosition(current); - if (current == end) { - AppendUnicodeTo(origin, current, aString); - result = Eof(); - } - - return result; -} - -//XXXbz callers of this have to manage their lone '\r' themselves if they want -//it to work. Good thing they're all in view-source and it deals. -nsresult nsScanner::ReadWhitespace(nsScannerIterator& aStart, - nsScannerIterator& aEnd, - PRInt32& aNewlinesSkipped) { - - if (!mSlidingBuffer) { - return kEOF; - } - - PRUnichar theChar = 0; - nsresult result = Peek(theChar); - - if (result == kEOF) { - return Eof(); - } - - nsScannerIterator origin, current, end; - PRBool done = PR_FALSE; - - origin = mCurrentPosition; - current = origin; - end = mEndPosition; - - while(!done && current != end) { - switch(theChar) { - case '\n': - case '\r': ++aNewlinesSkipped; - case ' ' : - case '\b': - case '\t': - { - PRUnichar thePrevChar = theChar; - theChar = (++current != end) ? *current : '\0'; - if ((thePrevChar == '\r' && theChar == '\n') || - (thePrevChar == '\n' && theChar == '\r')) { - theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF - } - } - break; - default: - done = PR_TRUE; - aStart = origin; - aEnd = current; - break; - } - } - - SetPosition(current); - if (current == end) { - aStart = origin; - aEnd = current; - result = Eof(); - } - - return result; -} - -/** - * Consume chars as long as they are <i>in</i> the - * given validSet of input chars. - * - * @update gess 3/25/98 - * @param aString will contain the result of this method - * @param aValidSet is an ordered string that contains the - * valid characters - * @return error code - */ -nsresult nsScanner::ReadWhile(nsString& aString, - nsString& aValidSet, - PRBool addTerminal){ - - if (!mSlidingBuffer) { - return kEOF; - } - - PRUnichar theChar=0; - nsresult result=Peek(theChar); - nsScannerIterator origin, current, end; - - origin = mCurrentPosition; - current = origin; - end = mEndPosition; - - while(current != end) { - - theChar=*current; - if(theChar) { - PRInt32 pos=aValidSet.FindChar(theChar); - if(kNotFound==pos) { - if(addTerminal) - ++current; - AppendUnicodeTo(origin, current, aString); - break; - } - } - ++current; - } - - SetPosition(current); - if (current == end) { - AppendUnicodeTo(origin, current, aString); - return Eof(); - } - - //DoErrTest(aString); - - return result; - -} - -/** - * Consume characters until you encounter one contained in given - * input set. - * - * @update gess 3/25/98 - * @param aString will contain the result of this method - * @param aTerminalSet is an ordered string that contains - * the set of INVALID characters - * @return error code - */ -nsresult nsScanner::ReadUntil(nsAString& aString, - const nsReadEndCondition& aEndCondition, - PRBool addTerminal) -{ - if (!mSlidingBuffer) { - return kEOF; - } - - nsScannerIterator origin, current; - const PRUnichar* setstart = aEndCondition.mChars; - const PRUnichar* setcurrent; - - origin = mCurrentPosition; - current = origin; - - PRUnichar theChar=0; - nsresult result=Peek(theChar); - - if (result == kEOF) { - return Eof(); - } - - while (current != mEndPosition) { - // Filter out completely wrong characters - // Check if all bits are in the required area - if(!(theChar & aEndCondition.mFilter)) { - // They were. Do a thorough check. - - setcurrent = setstart; - while (*setcurrent) { - if (*setcurrent == theChar) { - goto found; - } - ++setcurrent; - } - } - - ++current; - theChar = *current; - } - - // If we are here, we didn't find any terminator in the string and - // current = mEndPosition - SetPosition(current); - AppendUnicodeTo(origin, current, aString); - return Eof(); - -found: - if(addTerminal) - ++current; - AppendUnicodeTo(origin, current, aString); - SetPosition(current); - - //DoErrTest(aString); - - return NS_OK; -} - -nsresult nsScanner::ReadUntil(nsScannerIterator& aStart, - nsScannerIterator& aEnd, - const nsReadEndCondition &aEndCondition, - PRBool addTerminal) -{ - if (!mSlidingBuffer) { - return kEOF; - } - - nsScannerIterator origin, current; - const PRUnichar* setstart = aEndCondition.mChars; - const PRUnichar* setcurrent; - - origin = mCurrentPosition; - current = origin; - - PRUnichar theChar=0; - nsresult result=Peek(theChar); - - if (result == kEOF) { - aStart = aEnd = current; - return Eof(); - } - - while (current != mEndPosition) { - // Filter out completely wrong characters - // Check if all bits are in the required area - if(!(theChar & aEndCondition.mFilter)) { - // They were. Do a thorough check. - setcurrent = setstart; - while (*setcurrent) { - if (*setcurrent == theChar) { - goto found; - } - ++setcurrent; - } - } - - ++current; - theChar = *current; - } - - // If we are here, we didn't find any terminator in the string and - // current = mEndPosition - SetPosition(current); - aStart = origin; - aEnd = current; - return Eof(); - - found: - if(addTerminal) - ++current; - aStart = origin; - aEnd = current; - SetPosition(current); - - return NS_OK; -} - -/** - * Consumes chars until you see the given terminalChar - * - * @update gess 3/25/98 - * @param - * @return error code - */ -nsresult nsScanner::ReadUntil(nsAString& aString, - PRUnichar aTerminalChar, - PRBool addTerminal) -{ - if (!mSlidingBuffer) { - return kEOF; - } - - nsScannerIterator origin, current; - - origin = mCurrentPosition; - current = origin; - - PRUnichar theChar; - Peek(theChar); - - while (current != mEndPosition) { - if (aTerminalChar == theChar) { - if(addTerminal) - ++current; - AppendUnicodeTo(origin, current, aString); - SetPosition(current); - return NS_OK; - } - ++current; - theChar = *current; - } - - // If we are here, we didn't find any terminator in the string and - // current = mEndPosition - AppendUnicodeTo(origin, current, aString); - SetPosition(current); - return Eof(); - -} - -void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd) -{ - aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd); -} - -void nsScanner::CurrentPosition(nsScannerIterator& aPosition) -{ - aPosition = mCurrentPosition; -} - -void nsScanner::EndReading(nsScannerIterator& aPosition) -{ - aPosition = mEndPosition; -} - -void nsScanner::SetPosition(nsScannerIterator& aPosition, PRBool aTerminate, PRBool aReverse) -{ - if (mSlidingBuffer) { - if (aReverse) { - mCountRemaining += (Distance(aPosition, mCurrentPosition)); - } - else { - mCountRemaining -= (Distance(mCurrentPosition, aPosition)); - } - mCurrentPosition = aPosition; - if (aTerminate && (mCurrentPosition == mEndPosition)) { - mMarkPosition = mCurrentPosition; - mSlidingBuffer->DiscardPrefix(mCurrentPosition); - } - } -} - -void nsScanner::ReplaceCharacter(nsScannerIterator& aPosition, - PRUnichar aChar) -{ - if (mSlidingBuffer) { - mSlidingBuffer->ReplaceCharacter(aPosition, aChar); - } -} - -void nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf) -{ - if (!mSlidingBuffer) { - mSlidingBuffer = new nsScannerString(aBuf); - mSlidingBuffer->BeginReading(mCurrentPosition); - mMarkPosition = mCurrentPosition; - mSlidingBuffer->EndReading(mEndPosition); - mCountRemaining = aBuf->DataLength(); - } - else { - mSlidingBuffer->AppendBuffer(aBuf); - if (mCurrentPosition == mEndPosition) { - mSlidingBuffer->BeginReading(mCurrentPosition); - } - mSlidingBuffer->EndReading(mEndPosition); - mCountRemaining += aBuf->DataLength(); - } -} - -void nsScanner::AppendASCIItoBuffer(const char* aData, PRUint32 aLen) -{ - nsScannerString::Buffer* buf = nsScannerString::AllocBuffer(aLen); - if (buf) - { - LossyConvertEncoding<char, PRUnichar> converter(buf->DataStart()); - converter.write(aData, aLen); - converter.write_terminator(); - AppendToBuffer(buf); - } -} - -/** - * call this to copy bytes out of the scanner that have not yet been consumed - * by the tokenization process. - * - * @update gess 5/12/98 - * @param aCopyBuffer is where the scanner buffer will be copied to - * @return nada - */ -void nsScanner::CopyUnusedData(nsString& aCopyBuffer) { - nsScannerIterator start, end; - start = mCurrentPosition; - end = mEndPosition; - - CopyUnicodeTo(start, end, aCopyBuffer); -} - -/** - * Retrieve the name of the file that the scanner is reading from. - * In some cases, it's just a given name, because the scanner isn't - * really reading from a file. - * - * @update gess 5/12/98 - * @return - */ -nsString& nsScanner::GetFilename(void) { - return mFilename; -} - -/** - * Conduct self test. Actually, selftesting for this class - * occurs in the parser selftest. - * - * @update gess 3/25/98 - * @param - * @return - */ - -void nsScanner::SelfTest(void) { -#ifdef _DEBUG -#endif -} - - - diff --git a/htmlparser/src/nsScanner.h b/htmlparser/src/nsScanner.h deleted file mode 100644 index d7fcde1cd12a..000000000000 --- a/htmlparser/src/nsScanner.h +++ /dev/null @@ -1,391 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - - -/** - * MODULE NOTES: - * @update gess 4/1/98 - * - * The scanner is a low-level service class that knows - * how to consume characters out of an (internal) stream. - * This class also offers a series of utility methods - * that most tokenizers want, such as readUntil(), - * readWhile() and SkipWhitespace(). - */ - - -#ifndef SCANNER -#define SCANNER - -#include "nsCOMPtr.h" -#include "nsString.h" -#include "nsIParser.h" -#include "prtypes.h" -#include "nsIUnicodeDecoder.h" -#include "nsScannerString.h" -#include "nsIInputStream.h" - -class nsReadEndCondition { -public: - const PRUnichar *mChars; - PRUnichar mFilter; - explicit nsReadEndCondition(const PRUnichar* aTerminateChars); -private: - nsReadEndCondition(const nsReadEndCondition& aOther); // No copying - void operator=(const nsReadEndCondition& aOther); // No assigning -}; - -class nsScanner { - public: - - /** - * Use this constructor if you want i/o to be based on - * a single string you hand in during construction. - * This short cut was added for Javascript. - * - * @update ftang 3/02/99 - * @param aCharset charset - * @param aCharsetSource - where the charset info came from - * @param aMode represents the parser mode (nav, other) - * @return - */ - nsScanner(const nsAString& anHTMLString, const nsACString& aCharset, PRInt32 aSource); - - /** - * Use this constructor if you want i/o to be based on - * a file (therefore a stream) or just data you provide via Append(). - * - * @update ftang 3/02/99 - * @param aCharset charset - * @param aCharsetSource - where the charset info came from - * @param aMode represents the parser mode (nav, other) - * @return - */ - nsScanner(nsString& aFilename,PRBool aCreateStream, const nsACString& aCharset, PRInt32 aSource); - - /** - * Use this constructor if you want i/o to be stream based. - * - * @update ftang 3/02/99 - * @param aCharset charset - * @param aCharsetSource - where the charset info came from - * @param aMode represents the parser mode (nav, other) - * @return - */ - nsScanner(const nsAString& aFilename, nsIInputStream* aStream, const nsACString& aCharset, PRInt32 aSource); - - - ~nsScanner(); - - /** - * retrieve next char from internal input stream - * - * @update gess 3/25/98 - * @param ch is the char to accept new value - * @return error code reflecting read status - */ - nsresult GetChar(PRUnichar& ch); - - /** - * peek ahead to consume next char from scanner's internal - * input buffer - * - * @update gess 3/25/98 - * @param ch is the char to accept new value - * @return error code reflecting read status - */ - nsresult Peek(PRUnichar& ch, PRUint32 aOffset=0); - - nsresult Peek(nsAString& aStr, PRInt32 aNumChars); - - /** - * Skip over chars as long as they're in aSkipSet - * - * @update gess 3/25/98 - * @param set of chars to be skipped - * @return error code - */ - nsresult SkipOver(nsString& SkipChars); - - /** - * Skip over chars as long as they equal given char - * - * @update gess 3/25/98 - * @param char to be skipped - * @return error code - */ - nsresult SkipOver(PRUnichar aSkipChar); - - /** - * Skip over chars until they're in aValidSet - * - * @update gess 3/25/98 - * @param aValid set contains chars you're looking for - * @return error code - */ - nsresult SkipTo(nsString& aValidSet); - - /** - * Skip over chars as long as they're in aSequence - * - * @update gess 3/25/98 - * @param contains sequence to be skipped - * @return error code - */ - nsresult SkipPast(nsString& aSequence); - - /** - * Skip whitespace on scanner input stream - * - * @update gess 3/25/98 - * @return error status - */ - nsresult SkipWhitespace(PRInt32& aNewlinesSkipped); - - /** - * Determine if the scanner has reached EOF. - * This method can also cause the buffer to be filled - * if it happens to be empty - * - * @update gess 3/25/98 - * @return PR_TRUE upon eof condition - */ - nsresult Eof(void); - - /** - * Consume characters until you run into space, a '<', a '>', or a '/'. - * - * @param aString - receives new data from stream - * @return error code - */ - nsresult ReadTagIdentifier(nsString& aString); - - /** - * Consume characters until you run into a char that's not valid in an - * entity name - * - * @param aString - receives new data from stream - * @return error code - */ - nsresult ReadEntityIdentifier(nsString& aString); - nsresult ReadNumber(nsString& aString,PRInt32 aBase); - nsresult ReadWhitespace(nsString& aString, - PRInt32& aNewlinesSkipped); - nsresult ReadWhitespace(nsScannerIterator& aStart, - nsScannerIterator& aEnd, - PRInt32& aNewlinesSkipped); - - /** - * Consume characters until you find the terminal char - * - * @update gess 3/25/98 - * @param aString receives new data from stream - * @param aTerminal contains terminating char - * @param addTerminal tells us whether to append terminal to aString - * @return error code - */ - nsresult ReadUntil(nsAString& aString, - PRUnichar aTerminal, - PRBool addTerminal); - - /** - * Consume characters until you find one contained in given - * terminal set. - * - * @update gess 3/25/98 - * @param aString receives new data from stream - * @param aTermSet contains set of terminating chars - * @param addTerminal tells us whether to append terminal to aString - * @return error code - */ - nsresult ReadUntil(nsAString& aString, - const nsReadEndCondition& aEndCondition, - PRBool addTerminal); - - nsresult ReadUntil(nsScannerIterator& aStart, - nsScannerIterator& aEnd, - const nsReadEndCondition& aEndCondition, - PRBool addTerminal); - - - /** - * Consume characters while they're members of anInputSet - * - * @update gess 3/25/98 - * @param aString receives new data from stream - * @param anInputSet contains valid chars - * @param addTerminal tells us whether to append terminal to aString - * @return error code - */ - nsresult ReadWhile(nsString& aString,nsString& anInputSet,PRBool addTerminal); - - /** - * Records current offset position in input stream. This allows us - * to back up to this point if the need should arise, such as when - * tokenization gets interrupted. - * - * @update gess 5/12/98 - * @param - * @return - */ - void Mark(void); - - /** - * Resets current offset position of input stream to marked position. - * This allows us to back up to this point if the need should arise, - * such as when tokenization gets interrupted. - * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST! - * - * @update gess 5/12/98 - * @param - * @return - */ - void RewindToMark(void); - - - /** - * - * - * @update harishd 01/12/99 - * @param - * @return - */ - PRBool UngetReadable(const nsAString& aBuffer); - - /** - * - * - * @update gess 5/13/98 - * @param - * @return - */ - nsresult Append(const nsAString& aBuffer); - - /** - * - * - * @update gess 5/21/98 - * @param - * @return - */ - nsresult Append(const char* aBuffer, PRUint32 aLen); - - /** - * Call this to copy bytes out of the scanner that have not yet been consumed - * by the tokenization process. - * - * @update gess 5/12/98 - * @param aCopyBuffer is where the scanner buffer will be copied to - * @return nada - */ - void CopyUnusedData(nsString& aCopyBuffer); - - /** - * Retrieve the name of the file that the scanner is reading from. - * In some cases, it's just a given name, because the scanner isn't - * really reading from a file. - * - * @update gess 5/12/98 - * @return - */ - nsString& GetFilename(void); - - static void SelfTest(); - - /** - * Use this setter to change the scanner's unicode decoder - * - * @update ftang 3/02/99 - * @param aCharset a normalized (alias resolved) charset name - * @param aCharsetSource- where the charset info came from - * @return - */ - nsresult SetDocumentCharset(const nsACString& aCharset, PRInt32 aSource); - - void BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd); - void CurrentPosition(nsScannerIterator& aPosition); - void EndReading(nsScannerIterator& aPosition); - void SetPosition(nsScannerIterator& aPosition, - PRBool aTruncate = PR_FALSE, - PRBool aReverse = PR_FALSE); - void ReplaceCharacter(nsScannerIterator& aPosition, - PRUnichar aChar); - - /** - * Internal method used to cause the internal buffer to - * be filled with data. - * - * @update gess4/3/98 - */ - PRBool IsIncremental(void) {return mIncremental;} - void SetIncremental(PRBool anIncrValue) {mIncremental=anIncrValue;} - - protected: - - - enum {eBufferSizeThreshold=0x1000}; //4K - - /** - * Internal method used to cause the internal buffer to - * be filled with data. - * - * @update gess4/3/98 - */ - nsresult FillBuffer(void); - - void AppendToBuffer(nsScannerString::Buffer*); - void AppendToBuffer(const nsAString& aStr) { AppendToBuffer(nsScannerString::AllocBufferFromString(aStr)); } - void AppendASCIItoBuffer(const char* aData, PRUint32 aLen); - - nsCOMPtr<nsIInputStream> mInputStream; - nsScannerString* mSlidingBuffer; - nsScannerIterator mCurrentPosition; // The position we will next read from in the scanner buffer - nsScannerIterator mMarkPosition; // The position last marked (we may rewind to here) - nsScannerIterator mEndPosition; // The current end of the scanner buffer - nsString mFilename; - PRUint32 mCountRemaining; // The number of bytes still to be read - // from the scanner buffer - PRUint32 mTotalRead; - PRPackedBool mIncremental; - PRInt32 mCharsetSource; - nsCString mCharset; - nsIUnicodeDecoder *mUnicodeDecoder; -}; - -#endif - - diff --git a/htmlparser/src/nsScannerString.cpp b/htmlparser/src/nsScannerString.cpp deleted file mode 100644 index 06dba15845ac..000000000000 --- a/htmlparser/src/nsScannerString.cpp +++ /dev/null @@ -1,578 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* vim:set ts=2 sw=2 sts=2 et cindent: */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla. - * - * The Initial Developer of the Original Code is IBM Corporation. - * Portions created by IBM Corporation are Copyright (C) 2003 - * IBM Corporation. All Rights Reserved. - * - * Contributor(s): - * Darin Fisher <darin@meer.net> - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include <stdlib.h> -#include "nsScannerString.h" - - - /** - * nsScannerBufferList - */ - -nsScannerBufferList::Buffer* -nsScannerBufferList::AllocBufferFromString( const nsAString& aString ) - { - PRUint32 len = aString.Length(); - - Buffer* buf = (Buffer*) malloc(sizeof(Buffer) + (len + 1) * sizeof(PRUnichar)); - if (buf) - { - // leave PRCList members of Buffer uninitialized - - buf->mUsageCount = 0; - buf->mDataEnd = buf->DataStart() + len; - - nsAString::const_iterator source; - aString.BeginReading(source); - nsCharTraits<PRUnichar>::copy(buf->DataStart(), source.get(), len); - - // XXX null terminate. this shouldn't be required, but we do it because - // nsScanner erroneously thinks it can dereference DataEnd :-( - *buf->mDataEnd = PRUnichar(0); - } - return buf; - } - -nsScannerBufferList::Buffer* -nsScannerBufferList::AllocBuffer( PRUint32 capacity ) - { - Buffer* buf = (Buffer*) malloc(sizeof(Buffer) + (capacity + 1) * sizeof(PRUnichar)); - if (buf) - { - // leave PRCList members of Buffer uninitialized - - buf->mUsageCount = 0; - buf->mDataEnd = buf->DataStart() + capacity; - - // XXX null terminate. this shouldn't be required, but we do it because - // nsScanner erroneously thinks it can dereference DataEnd :-( - *buf->mDataEnd = PRUnichar(0); - } - return buf; - } - -void -nsScannerBufferList::ReleaseAll() - { - while (!PR_CLIST_IS_EMPTY(&mBuffers)) - { - PRCList* node = PR_LIST_HEAD(&mBuffers); - PR_REMOVE_LINK(node); - //printf(">>> freeing buffer @%p\n", node); - free(NS_STATIC_CAST(Buffer*, node)); - } - } - -void -nsScannerBufferList::SplitBuffer( const Position& pos ) - { - // splitting to the right keeps the work string and any extant token - // pointing to and holding a reference count on the same buffer. - - Buffer* bufferToSplit = pos.mBuffer; - NS_ASSERTION(bufferToSplit, "null pointer"); - - PRUint32 splitOffset = pos.mPosition - bufferToSplit->DataStart(); - NS_ASSERTION(pos.mPosition >= bufferToSplit->DataStart() && - splitOffset <= bufferToSplit->DataLength(), - "split offset is outside buffer"); - - PRUint32 len = bufferToSplit->DataLength() - splitOffset; - Buffer* new_buffer = AllocBuffer(len); - if (new_buffer) - { - nsCharTraits<PRUnichar>::copy(new_buffer->DataStart(), - bufferToSplit->DataStart() + splitOffset, - len); - InsertAfter(new_buffer, bufferToSplit); - bufferToSplit->SetDataLength(splitOffset); - } - } - -void -nsScannerBufferList::DiscardUnreferencedPrefix( Buffer* aBuf ) - { - if (aBuf == Head()) - { - while (!PR_CLIST_IS_EMPTY(&mBuffers) && !Head()->IsInUse()) - { - Buffer* buffer = Head(); - PR_REMOVE_LINK(buffer); - free(buffer); - } - } - } - -size_t -nsScannerBufferList::Position::Distance( const Position& aStart, const Position& aEnd ) - { - size_t result = 0; - if (aStart.mBuffer == aEnd.mBuffer) - { - result = aEnd.mPosition - aStart.mPosition; - } - else - { - result = aStart.mBuffer->DataEnd() - aStart.mPosition; - for (Buffer* b = aStart.mBuffer->Next(); b != aEnd.mBuffer; b = b->Next()) - result += b->DataLength(); - result += aEnd.mPosition - aEnd.mBuffer->DataStart(); - } - return result; - } - - -/** - * nsScannerSubstring - */ - -nsScannerSubstring::nsScannerSubstring() - : mStart(nsnull, nsnull) - , mEnd(nsnull, nsnull) - , mBufferList(nsnull) - , mLength(0) - , mIsDirty(PR_TRUE) - { - } - -nsScannerSubstring::nsScannerSubstring( const nsAString& s ) - : mBufferList(nsnull) - , mIsDirty(PR_TRUE) - { - Rebind(s); - } - -nsScannerSubstring::~nsScannerSubstring() - { - release_ownership_of_buffer_list(); - } - -PRInt32 -nsScannerSubstring::CountChar( PRUnichar c ) const - { - /* - re-write this to use a counting sink - */ - - size_type result = 0; - size_type lengthToExamine = Length(); - - nsScannerIterator iter; - for ( BeginReading(iter); ; ) - { - PRInt32 lengthToExamineInThisFragment = iter.size_forward(); - const PRUnichar* fromBegin = iter.get(); - result += size_type(NS_COUNT(fromBegin, fromBegin+lengthToExamineInThisFragment, c)); - if ( !(lengthToExamine -= lengthToExamineInThisFragment) ) - return result; - iter.advance(lengthToExamineInThisFragment); - } - // never reached; quiets warnings - return 0; - } - -void -nsScannerSubstring::Rebind( const nsScannerSubstring& aString, - const nsScannerIterator& aStart, - const nsScannerIterator& aEnd ) - { - // allow for the case where &aString == this - - aString.acquire_ownership_of_buffer_list(); - release_ownership_of_buffer_list(); - - mStart = aStart; - mEnd = aEnd; - mBufferList = aString.mBufferList; - mLength = Distance(aStart, aEnd); - mIsDirty = PR_TRUE; - } - -void -nsScannerSubstring::Rebind( const nsAString& aString ) - { - release_ownership_of_buffer_list(); - - mBufferList = new nsScannerBufferList(AllocBufferFromString(aString)); - mIsDirty = PR_TRUE; - - init_range_from_buffer_list(); - acquire_ownership_of_buffer_list(); - } - -const nsString& -nsScannerSubstring::AsString() const - { - if (mIsDirty) - { - nsScannerSubstring* mutable_this = NS_CONST_CAST(nsScannerSubstring*, this); - - nsScannerIterator start, end; - CopyUnicodeTo(BeginReading(start), EndReading(end), mutable_this->mFlattenedRep); - mutable_this->mIsDirty = PR_FALSE; - } - return mFlattenedRep; - } - -nsScannerIterator& -nsScannerSubstring::BeginReading( nsScannerIterator& iter ) const - { - iter.mOwner = this; - - iter.mFragment.mBuffer = mStart.mBuffer; - iter.mFragment.mFragmentStart = mStart.mPosition; - if (mStart.mBuffer == mEnd.mBuffer) - iter.mFragment.mFragmentEnd = mEnd.mPosition; - else - iter.mFragment.mFragmentEnd = mStart.mBuffer->DataEnd(); - - iter.mPosition = mStart.mPosition; - iter.normalize_forward(); - return iter; - } - -nsScannerIterator& -nsScannerSubstring::EndReading( nsScannerIterator& iter ) const - { - iter.mOwner = this; - - iter.mFragment.mBuffer = mEnd.mBuffer; - iter.mFragment.mFragmentEnd = mEnd.mPosition; - if (mStart.mBuffer == mEnd.mBuffer) - iter.mFragment.mFragmentStart = mStart.mPosition; - else - iter.mFragment.mFragmentStart = mEnd.mBuffer->DataStart(); - - iter.mPosition = mEnd.mPosition; - // must not |normalize_backward| as that would likely invalidate tests like |while ( first != last )| - return iter; - } - -PRBool -nsScannerSubstring::GetNextFragment( nsScannerFragment& frag ) const - { - // check to see if we are at the end of the buffer list - if (frag.mBuffer == mEnd.mBuffer) - return PR_FALSE; - - frag.mBuffer = NS_STATIC_CAST(const Buffer*, PR_NEXT_LINK(frag.mBuffer)); - - if (frag.mBuffer == mStart.mBuffer) - frag.mFragmentStart = mStart.mPosition; - else - frag.mFragmentStart = frag.mBuffer->DataStart(); - - if (frag.mBuffer == mEnd.mBuffer) - frag.mFragmentEnd = mEnd.mPosition; - else - frag.mFragmentEnd = frag.mBuffer->DataEnd(); - - return PR_TRUE; - } - -PRBool -nsScannerSubstring::GetPrevFragment( nsScannerFragment& frag ) const - { - // check to see if we are at the beginning of the buffer list - if (frag.mBuffer == mStart.mBuffer) - return PR_FALSE; - - frag.mBuffer = NS_STATIC_CAST(const Buffer*, PR_PREV_LINK(frag.mBuffer)); - - if (frag.mBuffer == mStart.mBuffer) - frag.mFragmentStart = mStart.mPosition; - else - frag.mFragmentStart = frag.mBuffer->DataStart(); - - if (frag.mBuffer == mEnd.mBuffer) - frag.mFragmentEnd = mEnd.mPosition; - else - frag.mFragmentEnd = frag.mBuffer->DataEnd(); - - return PR_TRUE; - } - - - /** - * nsScannerString - */ - -nsScannerString::nsScannerString( Buffer* aBuf ) - { - mBufferList = new nsScannerBufferList(aBuf); - - init_range_from_buffer_list(); - acquire_ownership_of_buffer_list(); - } - -void -nsScannerString::AppendBuffer( Buffer* aBuf ) - { - mBufferList->Append(aBuf); - mLength += aBuf->DataLength(); - - mEnd.mBuffer = aBuf; - mEnd.mPosition = aBuf->DataEnd(); - - mIsDirty = PR_TRUE; - } - -void -nsScannerString::DiscardPrefix( const nsScannerIterator& aIter ) - { - Position old_start(mStart); - mStart = aIter; - mLength -= Position::Distance(old_start, mStart); - - mStart.mBuffer->IncrementUsageCount(); - old_start.mBuffer->DecrementUsageCount(); - - mBufferList->DiscardUnreferencedPrefix(old_start.mBuffer); - - mIsDirty = PR_TRUE; - } - -void -nsScannerString::UngetReadable( const nsAString& aReadable, const nsScannerIterator& aInsertPoint ) - /* - * Warning: this routine manipulates the shared buffer list in an unexpected way. - * The original design did not really allow for insertions, but this call promises - * that if called for a point after the end of all extant token strings, that no token string - * or the work string will be invalidated. - * - * This routine is protected because it is the responsibility of the derived class to keep those promises. - */ - { - Position insertPos(aInsertPoint); - - mBufferList->SplitBuffer(insertPos); - // splitting to the right keeps the work string and any extant token pointing to and - // holding a reference count on the same buffer - - Buffer* new_buffer = AllocBufferFromString(aReadable); - // make a new buffer with all the data to insert... - // BULLSHIT ALERT: we may have empty space to re-use in the split buffer, measure the cost - // of this and decide if we should do the work to fill it - - Buffer* buffer_to_split = insertPos.mBuffer; - mBufferList->InsertAfter(new_buffer, buffer_to_split); - mLength += aReadable.Length(); - - mEnd.mBuffer = mBufferList->Tail(); - mEnd.mPosition = mEnd.mBuffer->DataEnd(); - - mIsDirty = PR_TRUE; - } - -void -nsScannerString::ReplaceCharacter(nsScannerIterator& aPosition, PRUnichar aChar) - { - // XXX Casting a const to non-const. Unless the base class - // provides support for writing iterators, this is the best - // that can be done. - PRUnichar* pos = NS_CONST_CAST(PRUnichar*, aPosition.get()); - *pos = aChar; - - mIsDirty = PR_TRUE; - } - - - /** - * utils -- based on code from nsReadableUtils.cpp - */ - -void -CopyUnicodeTo( const nsScannerIterator& aSrcStart, - const nsScannerIterator& aSrcEnd, - nsAString& aDest ) - { - nsAString::iterator writer; - aDest.SetLength(Distance(aSrcStart, aSrcEnd)); - aDest.BeginWriting(writer); - nsScannerIterator fromBegin(aSrcStart); - - copy_string(fromBegin, aSrcEnd, writer); - } - -void -AppendUnicodeTo( const nsScannerIterator& aSrcStart, - const nsScannerIterator& aSrcEnd, - nsAString& aDest ) - { - nsAString::iterator writer; - PRUint32 oldLength = aDest.Length(); - aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd)); - aDest.BeginWriting(writer).advance(oldLength); - nsScannerIterator fromBegin(aSrcStart); - - copy_string(fromBegin, aSrcEnd, writer); - } - -PRBool -FindCharInReadable( PRUnichar aChar, - nsScannerIterator& aSearchStart, - const nsScannerIterator& aSearchEnd ) - { - while ( aSearchStart != aSearchEnd ) - { - PRInt32 fragmentLength; - if ( SameFragment(aSearchStart, aSearchEnd) ) - fragmentLength = aSearchEnd.get() - aSearchStart.get(); - else - fragmentLength = aSearchStart.size_forward(); - - const PRUnichar* charFoundAt = nsCharTraits<PRUnichar>::find(aSearchStart.get(), fragmentLength, aChar); - if ( charFoundAt ) { - aSearchStart.advance( charFoundAt - aSearchStart.get() ); - return PR_TRUE; - } - - aSearchStart.advance(fragmentLength); - } - - return PR_FALSE; - } - -PRBool -FindInReadable( const nsAString& aPattern, - nsScannerIterator& aSearchStart, - nsScannerIterator& aSearchEnd, - const nsStringComparator& compare ) - { - PRBool found_it = PR_FALSE; - - // only bother searching at all if we're given a non-empty range to search - if ( aSearchStart != aSearchEnd ) - { - nsAString::const_iterator aPatternStart, aPatternEnd; - aPattern.BeginReading(aPatternStart); - aPattern.EndReading(aPatternEnd); - - // outer loop keeps searching till we find it or run out of string to search - while ( !found_it ) - { - // fast inner loop (that's what it's called, not what it is) looks for a potential match - while ( aSearchStart != aSearchEnd && - compare(*aPatternStart, *aSearchStart) ) - ++aSearchStart; - - // if we broke out of the `fast' loop because we're out of string ... we're done: no match - if ( aSearchStart == aSearchEnd ) - break; - - // otherwise, we're at a potential match, let's see if we really hit one - nsAString::const_iterator testPattern(aPatternStart); - nsScannerIterator testSearch(aSearchStart); - - // slow inner loop verifies the potential match (found by the `fast' loop) at the current position - for(;;) - { - // we already compared the first character in the outer loop, - // so we'll advance before the next comparison - ++testPattern; - ++testSearch; - - // if we verified all the way to the end of the pattern, then we found it! - if ( testPattern == aPatternEnd ) - { - found_it = PR_TRUE; - aSearchEnd = testSearch; // return the exact found range through the parameters - break; - } - - // if we got to end of the string we're searching before we hit the end of the - // pattern, we'll never find what we're looking for - if ( testSearch == aSearchEnd ) - { - aSearchStart = aSearchEnd; - break; - } - - // else if we mismatched ... it's time to advance to the next search position - // and get back into the `fast' loop - if ( compare(*testPattern, *testSearch) ) - { - ++aSearchStart; - break; - } - } - } - } - - return found_it; - } - - /** - * This implementation is simple, but does too much work. - * It searches the entire string from left to right, and returns the last match found, if any. - * This implementation will be replaced when I get |reverse_iterator|s working. - */ -PRBool -RFindInReadable( const nsAString& aPattern, - nsScannerIterator& aSearchStart, - nsScannerIterator& aSearchEnd, - const nsStringComparator& aComparator ) - { - PRBool found_it = PR_FALSE; - - nsScannerIterator savedSearchEnd(aSearchEnd); - nsScannerIterator searchStart(aSearchStart), searchEnd(aSearchEnd); - - while ( searchStart != searchEnd ) - { - if ( FindInReadable(aPattern, searchStart, searchEnd, aComparator) ) - { - found_it = PR_TRUE; - - // this is the best match so far, so remember it - aSearchStart = searchStart; - aSearchEnd = searchEnd; - - // ...and get ready to search some more - // (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns) - ++searchStart; - searchEnd = savedSearchEnd; - } - } - - // if we never found it, return an empty range - if ( !found_it ) - aSearchStart = aSearchEnd; - - return found_it; - } diff --git a/htmlparser/src/nsToken.cpp b/htmlparser/src/nsToken.cpp deleted file mode 100644 index 830ac904763b..000000000000 --- a/htmlparser/src/nsToken.cpp +++ /dev/null @@ -1,176 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "nsToken.h" -#include "nsScanner.h" - - -#ifdef MATCH_CTOR_DTOR -MOZ_DECL_CTOR_COUNTER(CToken) -#endif - -static int TokenCount=0; -static int DelTokenCount=0; - -int CToken::GetTokenCount() { - return TokenCount-DelTokenCount; -} - - -/************************************************************** - And now for the CToken... - **************************************************************/ - -/** - * Default constructor - * - * @update gess 7/21/98 - */ -CToken::CToken(PRInt32 aTag) { - // Tokens are allocated through the arena ( not heap allocated..yay ). - // We, therefore, don't need this macro anymore.. -#ifdef MATCH_CTOR_DTOR - MOZ_COUNT_CTOR(CToken); -#endif - mAttrCount=0; - mNewlineCount=0; - mLineNumber = 0; - mTypeID=aTag; - // Note that the use count starts with 1 instead of 0. This - // is because of the assumption that any token created is in - // use and therefore does not require an explicit addref, or - // rather IF_HOLD. This, also, will make sure that tokens created - // on the stack do not accidently hit the arena recycler. - mUseCount=1; - -#ifdef NS_DEBUG - ++TokenCount; -#endif -} - -/** - * Decstructor - * - * @update gess 3/25/98 - */ -CToken::~CToken() { - // Tokens are allocated through the arena ( not heap allocated..yay ). - // We, therefore, don't need this macro anymore.. -#ifdef MATCH_CTOR_DTOR - MOZ_COUNT_DTOR(CToken); -#endif - ++DelTokenCount; - mUseCount=0; -} - - -/** - * Virtual method used to tell this toke to consume his - * valid chars. - * - * @update gess 3/25/98 - * @param aChar -- first char in sequence - * @param aScanner -- object to retrieve data from - * @return int error code - */ -nsresult CToken::Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode) { - nsresult result=NS_OK; - return result; -} - -/** - * Get string of full contents, suitable for debug dump. - * It should look exactly like the input source. - * @update gess5/11/98 - * @return reference to string containing string value - */ -void CToken::GetSource(nsString& anOutputString) { - anOutputString.Assign(GetStringValue()); -} - -/** - * @update harishd 3/23/00 - * @return reference to string containing string value - */ -void CToken::AppendSourceTo(nsAString& anOutputString) { - anOutputString.Append(GetStringValue()); -} - -/** - * Retrieves copy of internal ordinal value. - * This method is deprecated, and will soon be going away. - * - * @update gess 3/25/98 - * @return int containing ordinal value - */ -PRInt32 CToken::GetTypeID(void) { - return mTypeID; -} - -/** - * Retrieves copy of attr count for this token - * - * @update gess 3/25/98 - * @return int containing attribute count - */ -PRInt16 CToken::GetAttributeCount(void) { - return mAttrCount; -} - - -/** - * Retrieve type of token. This class returns -1, but - * subclasses return something more meaningful. - * - * @update gess 3/25/98 - * @return int value containing token type. - */ -PRInt32 CToken::GetTokenType(void) { - return -1; -} - - -/** - * - * @update gess 3/25/98 - */ -void CToken::SelfTest(void) { -#ifdef _DEBUG -#endif -} - - diff --git a/htmlparser/src/nsViewSourceHTML.cpp b/htmlparser/src/nsViewSourceHTML.cpp deleted file mode 100644 index fb160e0cc3a8..000000000000 --- a/htmlparser/src/nsViewSourceHTML.cpp +++ /dev/null @@ -1,1224 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * jce2@po.cwru.edu <Jason Eager>: Added pref to turn on/off - * Boris Zbarsky <bzbarsky@mit.edu> - * rbs@maths.uq.edu.au - * Andreas M. Schneider <clarence@clarence.de> - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -/** - * MODULE NOTES: - * @update gess 4/8/98 - * - * - */ - -/* - * Set NS_VIEWSOURCE_TOKENS_PER_BLOCK to 0 to disable multi-block - * output. Multi-block output helps reduce the amount of bidi - * processing we have to do on the resulting content model. - */ -#define NS_VIEWSOURCE_TOKENS_PER_BLOCK 16 - -#ifdef RAPTOR_PERF_METRICS -# define START_TIMER() \ - if(mParser) mParser->mParseTime.Start(PR_FALSE); \ - if(mParser) mParser->mDTDTime.Start(PR_FALSE); - -# define STOP_TIMER() \ - if(mParser) mParser->mParseTime.Stop(); \ - if(mParser) mParser->mDTDTime.Stop(); - -#else -# define STOP_TIMER() -# define START_TIMER() -#endif - -#include "nsIAtom.h" -#include "nsViewSourceHTML.h" -#include "nsCRT.h" -#include "nsParser.h" -#include "nsScanner.h" -#include "nsIParser.h" -#include "nsDTDUtils.h" -#include "nsIContentSink.h" -#include "nsIHTMLContentSink.h" -#include "nsHTMLTokenizer.h" -#include "nsIPrefService.h" -#include "nsIPrefBranch.h" -#include "nsUnicharUtils.h" -#include "nsPrintfCString.h" - -#include "nsIServiceManager.h" - -#include "COtherDTD.h" -#include "nsElementTable.h" - -#include "prenv.h" //this is here for debug reasons... -#include "prtypes.h" //this is here for debug reasons... -#include "prio.h" -#include "plstr.h" -#include "prmem.h" - -#ifdef RAPTOR_PERF_METRICS -#include "stopwatch.h" -Stopwatch vsTimer; -#endif - - -static NS_DEFINE_IID(kClassIID, NS_VIEWSOURCE_HTML_IID); - -// Define this to dump the viewsource stuff to a file -//#define DUMP_TO_FILE -#ifdef DUMP_TO_FILE -#include <stdio.h> - FILE* gDumpFile=0; - static const char* gDumpFileName = "/tmp/viewsource.html"; -// static const char* gDumpFileName = "\\temp\\viewsource.html"; -#endif // DUMP_TO_FILE - -// bug 22022 - these are used to toggle 'Wrap Long Lines' on the viewsource -// window by selectively setting/unsetting the following class defined in -// viewsource.css; the setting is remembered between invocations using a pref. -static const char kBodyId[] = "viewsource"; -static const char kBodyClassWrap[] = "wrap"; - -/** - * This method gets called as part of our COM-like interfaces. - * Its purpose is to create an interface to parser object - * of some type. - * - * @update gess 4/8/98 - * @param nsIID id of object to discover - * @param aInstancePtr ptr to newly discovered interface - * @return NS_xxx result code - */ -nsresult CViewSourceHTML::QueryInterface(const nsIID& aIID, void** aInstancePtr) -{ - if (NULL == aInstancePtr) { - return NS_ERROR_NULL_POINTER; - } - - if(aIID.Equals(NS_GET_IID(nsISupports))) { //do IUnknown... - *aInstancePtr = (nsIDTD*)(this); - } - else if(aIID.Equals(NS_GET_IID(nsIDTD))) { //do IParser base class... - *aInstancePtr = (nsIDTD*)(this); - } - else if(aIID.Equals(kClassIID)) { //do this class... - *aInstancePtr = (CViewSourceHTML*)(this); - } - else { - *aInstancePtr=0; - return NS_NOINTERFACE; - } - NS_ADDREF_THIS(); - return NS_OK; -} - -/** - * This method is defined in nsIParser. It is used to - * cause the COM-like construction of an nsParser. - * - * @update gess 4/8/98 - * @param nsIParser** ptr to newly instantiated parser - * @return NS_xxx error result - */ -nsresult NS_NewViewSourceHTML(nsIDTD** aInstancePtrResult) -{ - CViewSourceHTML* it = new CViewSourceHTML(); - - if (it == 0) { - return NS_ERROR_OUT_OF_MEMORY; - } - - return it->QueryInterface(kClassIID, (void **) aInstancePtrResult); -} - - -NS_IMPL_ADDREF(CViewSourceHTML) -NS_IMPL_RELEASE(CViewSourceHTML) - -/******************************************** - ********************************************/ - -class CIndirectTextToken : public CTextToken { -public: - CIndirectTextToken() : CTextToken() { - mIndirectString=0; - } - - void SetIndirectString(const nsAString& aString) { - mIndirectString=&aString; - } - - virtual const nsAString& GetStringValue(void){ - return (const nsAString&)*mIndirectString; - } - - const nsAString* mIndirectString; -}; - - -/******************************************************************* - Now define the CSharedVSCOntext class... - *******************************************************************/ - -class CSharedVSContext { -public: - - CSharedVSContext() : - mErrorToken(NS_LITERAL_STRING("error")) { - } - - ~CSharedVSContext() { - } - - static CSharedVSContext& GetSharedContext() { - static CSharedVSContext gSharedVSContext; - return gSharedVSContext; - } - - nsCParserNode mEndNode; - nsCParserStartNode mStartNode; - nsCParserStartNode mTokenNode; - CIndirectTextToken mITextToken; - CTextToken mErrorToken; -}; - -enum { - VIEW_SOURCE_START_TAG = 0, - VIEW_SOURCE_END_TAG = 1, - VIEW_SOURCE_COMMENT = 2, - VIEW_SOURCE_CDATA = 3, - VIEW_SOURCE_DOCTYPE = 4, - VIEW_SOURCE_PI = 5, - VIEW_SOURCE_ENTITY = 6, - VIEW_SOURCE_TEXT = 7, - VIEW_SOURCE_ATTRIBUTE_NAME = 8, - VIEW_SOURCE_ATTRIBUTE_VALUE = 9, - VIEW_SOURCE_SUMMARY = 10, - VIEW_SOURCE_POPUP = 11, - VIEW_SOURCE_MARKUPDECLARATION = 12 -}; - -static const char* const kElementClasses[] = { - "start-tag", - "end-tag", - "comment", - "cdata", - "doctype", - "pi", - "entity", - "text", - "attribute-name", - "attribute-value", - "summary", - "popup", - "markupdeclaration" -}; - -static const char* const kBeforeText[] = { - "<", - "</", - "", - "", - "", - "", - "&", - "", - "", - "=", - "", - "", - "" -}; - -static const char* const kAfterText[] = { - ">", - ">", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "" -}; - -#ifdef DUMP_TO_FILE -static const char* const kDumpFileBeforeText[] = { - "<", - "</", - "", - "", - "", - "", - "&", - "", - "", - "=", - "", - "", - "" -}; - -static const char* const kDumpFileAfterText[] = { - ">", - ">", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "" -}; -#endif // DUMP_TO_FILE - -/** - * Default constructor - * - * @update gess 4/9/98 - * @param - * @return - */ -CViewSourceHTML::CViewSourceHTML() : mFilename(), mTags(), mErrors() { - mStartTag = VIEW_SOURCE_START_TAG; - mEndTag = VIEW_SOURCE_END_TAG; - mCommentTag = VIEW_SOURCE_COMMENT; - mCDATATag = VIEW_SOURCE_CDATA; - mMarkupDeclaration = VIEW_SOURCE_MARKUPDECLARATION; - mDocTypeTag = VIEW_SOURCE_DOCTYPE; - mPITag = VIEW_SOURCE_PI; - mEntityTag = VIEW_SOURCE_ENTITY; - mText = VIEW_SOURCE_TEXT; - mKey = VIEW_SOURCE_ATTRIBUTE_NAME; - mValue = VIEW_SOURCE_ATTRIBUTE_VALUE; - mSummaryTag = VIEW_SOURCE_SUMMARY; - mPopupTag = VIEW_SOURCE_POPUP; - mSyntaxHighlight = PR_FALSE; - mWrapLongLines = PR_FALSE; - nsCOMPtr<nsIPrefBranch> prefBranch(do_GetService(NS_PREFSERVICE_CONTRACTID)); - if (prefBranch) { - PRBool temp; - nsresult rv; - rv = prefBranch->GetBoolPref("view_source.syntax_highlight", &temp); - mSyntaxHighlight = NS_SUCCEEDED(rv) ? temp : PR_TRUE; - - rv = prefBranch->GetBoolPref("view_source.wrap_long_lines", &temp); - mWrapLongLines = NS_SUCCEEDED(rv) ? temp : PR_FALSE; - } - - mParser = 0; - mSink = 0; - mLineNumber = 1; - mTokenizer = 0; - mDocType=eHTML3_Quirks; // why? - mHasOpenRoot=PR_FALSE; - mHasOpenBody=PR_FALSE; - - mTokenCount=0; - -#ifdef DUMP_TO_FILE - gDumpFile = fopen(gDumpFileName,"w"); -#endif // DUMP_TO_FILE - -} - - - -/** - * Default destructor - * - * @update gess 4/9/98 - * @param - * @return - */ -CViewSourceHTML::~CViewSourceHTML(){ - mParser=0; //just to prove we destructed... -} - -/** - * - * @update gess1/8/99 - * @param - * @return - */ -const nsIID& CViewSourceHTML::GetMostDerivedIID(void) const{ - return kClassIID; -} - -/** - * Call this method if you want the DTD to construct a fresh - * instance of itself. - * @update gess7/23/98 - * @param - * @return - */ -nsresult CViewSourceHTML::CreateNewInstance(nsIDTD** aInstancePtrResult){ - return NS_NewViewSourceHTML(aInstancePtrResult); -} - -/** - * This method is called to determine if the given DTD can parse - * a document in a given source-type. - * NOTE: Parsing always assumes that the end result will involve - * storing the result in the main content model. - * @update gess6/24/98 - * @param - * @return TRUE if this DTD can satisfy the request; FALSE otherwise. - */ -NS_IMETHODIMP_(eAutoDetectResult) -CViewSourceHTML::CanParse(CParserContext& aParserContext, - const nsString& aBuffer, PRInt32 aVersion) -{ - eAutoDetectResult result=eUnknownDetect; - - if(eViewSource==aParserContext.mParserCommand) { - if(aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kPlainTextContentType)) || - aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kTextCSSContentType)) || - aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kTextJSContentType)) || - aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kApplicationJSContentType))) { - result=eValidDetect; - } - if(aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kXMLTextContentType)) || - aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kXMLApplicationContentType)) || - aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kXHTMLApplicationContentType)) || - aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kRDFTextContentType)) || - aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kHTMLTextContentType)) || - aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kXULTextContentType)) || -#ifdef MOZ_SVG - aParserContext.mMimeType.Equals(NS_LITERAL_CSTRING(kSVGTextContentType)) || -#endif - aParserContext.mMimeType.Equals(kSGMLTextContentType)) { - result=ePrimaryDetect; - } - } - return result; -} - - -/** - * The parser uses a code sandwich to wrap the parsing process. Before - * the process begins, WillBuildModel() is called. Afterwards the parser - * calls DidBuildModel(). - * @update rickg 03.20.2000 - * @param aParserContext - * @param aSink - * @return error code (almost always 0) - */ -nsresult CViewSourceHTML::WillBuildModel(const CParserContext& aParserContext, - nsITokenizer* aTokenizer, - nsIContentSink* aSink){ - - nsresult result=NS_OK; - -#ifdef RAPTOR_PERF_METRICS - vsTimer.Reset(); - NS_START_STOPWATCH(vsTimer); -#endif - - STOP_TIMER(); - mSink=(nsIHTMLContentSink*)aSink; - - if((!aParserContext.mPrevContext) && (mSink)) { - - nsAString & contextFilename = aParserContext.mScanner->GetFilename(); - mFilename = Substring(contextFilename, - 12, // The length of "view-source:" - contextFilename.Length() - 12); - - mTags.Truncate(); - mErrors.Assign(NS_LITERAL_STRING(" HTML 4.0 Strict-DTD validation (enabled); [Should use Transitional?].\n")); - - mDocType=aParserContext.mDocType; - mMimeType=aParserContext.mMimeType; - mDTDMode=aParserContext.mDTDMode; - mParserCommand=aParserContext.mParserCommand; - mTokenizer = aTokenizer; - mErrorCount=0; - mTagCount=0; - -#ifdef DUMP_TO_FILE - if (gDumpFile) { - - fprintf(gDumpFile, "<html>\n"); - fprintf(gDumpFile, "<head>\n"); - fprintf(gDumpFile, "<title>"); - fprintf(gDumpFile, "Source of: "); - fputs(NS_ConvertUCS2toUTF8(mFilename).get(), gDumpFile); - fprintf(gDumpFile, "\n"); - fprintf(gDumpFile, "\n"); - fprintf(gDumpFile, "\n"); - fprintf(gDumpFile, "\n"); - fprintf(gDumpFile, "\n"); - fprintf(gDumpFile, "
\n");
-    }
-#endif //DUMP_TO_FILE
-  }
-
-
-  if(eViewSource!=aParserContext.mParserCommand)
-    mDocType=ePlainText;
-  else mDocType=aParserContext.mDocType;
-
-  mLineNumber = 1;
-  result = mSink->WillBuildModel(); 
-
-  START_TIMER();
-  return result;
-}
-
-/**
-  * The parser uses a code sandwich to wrap the parsing process. Before
-  * the process begins, WillBuildModel() is called. Afterwards the parser
-  * calls DidBuildModel(). 
-  * @update	gess5/18/98
-  * @param	aFilename is the name of the file being parsed.
-  * @return	error code (almost always 0)
-  */
-NS_IMETHODIMP CViewSourceHTML::BuildModel(nsIParser* aParser,nsITokenizer* aTokenizer,nsITokenObserver* anObserver,nsIContentSink* aSink) {
-  nsresult result=NS_OK;
-
-  if(aTokenizer && aParser) {
-
-    nsITokenizer*  oldTokenizer=mTokenizer;
-    mTokenizer=aTokenizer;
-    nsTokenAllocator* theAllocator=mTokenizer->GetTokenAllocator();
-
-    if(!mHasOpenRoot) {
-      // For the stack-allocated tokens below, it's safe to pass a null
-      // token allocator, because there are no attributes on the tokens.
-      PRBool didBlock = PR_FALSE;
-
-      CStartToken htmlToken(NS_LITERAL_STRING("HTML"), eHTMLTag_html);
-      nsCParserNode htmlNode(&htmlToken, 0/*stack token*/);
-      mSink->OpenHTML(htmlNode);
-
-      CStartToken headToken(NS_LITERAL_STRING("HEAD"), eHTMLTag_head);
-      nsCParserNode headNode(&headToken, 0/*stack token*/);
-      mSink->OpenHead(headNode);
-
-      // Note that XUL will automatically add the prefix "Source of: "
-      if (StringBeginsWith(mFilename, NS_LITERAL_STRING("data:")) &&
-          mFilename.Length() > 50) {
-        nsAutoString dataFilename(Substring(mFilename, 0, 50));
-        dataFilename.Append(NS_LITERAL_STRING("..."));
-        mSink->SetTitle(dataFilename);
-      } else {
-        mSink->SetTitle(mFilename);
-      }
-
-      if (theAllocator) {
-        CStartToken* theToken=
-          NS_STATIC_CAST(CStartToken*,
-                         theAllocator->CreateTokenOfType(eToken_start,
-                                                         eHTMLTag_link,
-                                                         NS_LITERAL_STRING("LINK")));
-        if (theToken) {
-          nsCParserStartNode theNode(theToken, theAllocator);
-
-          AddAttrToNode(theNode, theAllocator,
-                        NS_LITERAL_STRING("rel"),
-                        NS_LITERAL_STRING("stylesheet"));
-
-          AddAttrToNode(theNode, theAllocator,
-                        NS_LITERAL_STRING("type"),
-                        NS_LITERAL_STRING("text/css"));
-
-          AddAttrToNode(theNode, theAllocator,
-                        NS_LITERAL_STRING("href"),
-                        NS_LITERAL_STRING("resource://gre/res/viewsource.css"));
-          
-          result = mSink->AddLeaf(theNode);
-          didBlock = result == NS_ERROR_HTMLPARSER_BLOCK;
-        }
-      }
-
-      CEndToken endHeadToken(eHTMLTag_head);
-      nsCParserNode endHeadNode(&endHeadToken, 0/*stack token*/);
-      result = mSink->CloseHead();
-      if(NS_SUCCEEDED(result)) {
-        mHasOpenRoot = PR_TRUE;
-        if (didBlock) {
-          result = NS_ERROR_HTMLPARSER_BLOCK;
-        }
-      }
-    }
-    if (NS_SUCCEEDED(result) && !mHasOpenBody) {
-      if (theAllocator) {
-        CStartToken* bodyToken=
-          NS_STATIC_CAST(CStartToken*,
-                         theAllocator->CreateTokenOfType(eToken_start,
-                                                         eHTMLTag_body,
-                                                         NS_LITERAL_STRING("BODY")));
-        if (bodyToken) {
-          nsCParserStartNode bodyNode(bodyToken, theAllocator);
-
-          AddAttrToNode(bodyNode, theAllocator,
-                        NS_LITERAL_STRING("id"),
-                        NS_ConvertASCIItoUCS2(kBodyId));
-          
-          if (mWrapLongLines) {
-            AddAttrToNode(bodyNode, theAllocator,
-                          NS_LITERAL_STRING("class"),
-                          NS_ConvertASCIItoUCS2(kBodyClassWrap));
-          }
-          result = mSink->OpenBody(bodyNode);
-          if(NS_SUCCEEDED(result)) mHasOpenBody=PR_TRUE;
-        }
-        
-        if (NS_SUCCEEDED(result)) {
-          CStartToken* preToken =
-            NS_STATIC_CAST(CStartToken*,
-                           theAllocator->CreateTokenOfType(eToken_start,
-                                                           eHTMLTag_pre,
-                                                           NS_LITERAL_STRING("PRE")));
-          if (preToken) {
-            nsCParserStartNode preNode(preToken, theAllocator);
-            AddAttrToNode(preNode, theAllocator,
-                          NS_LITERAL_STRING("id"),
-                          NS_LITERAL_STRING("line1"));
-            result = mSink->OpenContainer(preNode);
-          } else {
-            result = NS_ERROR_OUT_OF_MEMORY;
-          }
-        }
-      }
-    }
-
-    mSink->WillProcessTokens();
-
-    while(NS_SUCCEEDED(result)){
-      CToken* theToken=mTokenizer->PopToken();
-      if(theToken) {
-        result=HandleToken(theToken,aParser);
-        if(NS_SUCCEEDED(result)) {
-          IF_FREE(theToken, mTokenizer->GetTokenAllocator());
-          if (mParser->CanInterrupt() &&
-              mSink->DidProcessAToken() == NS_ERROR_HTMLPARSER_INTERRUPTED) {
-            result = NS_ERROR_HTMLPARSER_INTERRUPTED;
-            break;
-          }
-        }
-        else if(NS_ERROR_HTMLPARSER_BLOCK!=result){
-          mTokenizer->PushTokenFront(theToken);
-        }
-      }
-      else break;
-    }//while
-   
-    mTokenizer=oldTokenizer;
-  }
-  else result=NS_ERROR_HTMLPARSER_BADTOKENIZER;
-  return result;
-}
-
-
-/**
- * Call this to display an error summary regarding the page.
- * 
- * @update	rickg 6June2000
- * @return  nsresult
- */
-nsresult  CViewSourceHTML::GenerateSummary() {
-  nsresult result=NS_OK;
-
-  if(mErrorCount && mTagCount) {
-
-    mErrors.Append(NS_LITERAL_STRING("\n\n "));
-    mErrors.AppendInt(mErrorCount);
-    mErrors.Append(NS_LITERAL_STRING(" error(s) detected -- see highlighted portions.\n"));
-
-    result=WriteTag(mSummaryTag,mErrors,0,PR_FALSE);
-  }
-
-  return result;
-}
-
-/**
- * Call this to start a new PRE block.  See bug 86355 for why this
- * makes some pages much faster.
- */
-void CViewSourceHTML::StartNewPreBlock(void){
-  CEndToken endToken(eHTMLTag_pre);
-  nsCParserNode endNode(&endToken, 0/*stack token*/);
-  mSink->CloseContainer(eHTMLTag_pre);
-
-  nsTokenAllocator* theAllocator = mTokenizer->GetTokenAllocator();
-  if (!theAllocator) {
-    return;
-  }
-  
-  CStartToken* theToken =
-    NS_STATIC_CAST(CStartToken*,
-                   theAllocator->CreateTokenOfType(eToken_start,
-                                                   eHTMLTag_pre,
-                                                   NS_LITERAL_STRING("PRE")));
-  if (!theToken) {
-    return;
-  }
-
-  nsCParserStartNode startNode(theToken, theAllocator);
-  AddAttrToNode(startNode, theAllocator,
-                NS_LITERAL_STRING("id"),
-                NS_ConvertASCIItoUCS2(nsPrintfCString("line%d", mLineNumber)));
-  mSink->OpenContainer(startNode);
-  
-#ifdef DUMP_TO_FILE
-  if (gDumpFile) {
-    fprintf(gDumpFile, "
\n"); - fprintf(gDumpFile, "
\n", mLineNumber);
-  }
-#endif // DUMP_TO_FILE
-
-  mTokenCount = 0;
-}
-
-void CViewSourceHTML::AddAttrToNode(nsCParserStartNode& aNode,
-                                    nsTokenAllocator* aAllocator,
-                                    const nsAString& aAttrName,
-                                    const nsAString& aAttrValue)
-{
-  NS_PRECONDITION(aAllocator, "Must have a token allocator!");
-  
-  CAttributeToken* theAttr =
-    (CAttributeToken*) aAllocator->CreateTokenOfType(eToken_attribute,
-                                                     eHTMLTag_unknown,
-                                                     aAttrValue);
-  if (!theAttr) {
-    NS_ERROR("Failed to allocate attribute token");
-    return;
-  }
-
-  theAttr->SetKey(aAttrName);
-  aNode.AddAttribute(theAttr);
-}
-
-/**
- * 
- * @update	gess5/18/98
- * @param 
- * @return
- */
-NS_IMETHODIMP CViewSourceHTML::DidBuildModel(nsresult anErrorCode,PRBool aNotifySink,nsIParser* aParser,nsIContentSink* aSink){
-  nsresult result= NS_OK;
-
-  //ADD CODE HERE TO CLOSE OPEN CONTAINERS...
-
-  if(aParser){
-
-    mParser=(nsParser*)aParser;  //debug XXX
-    STOP_TIMER();
-
-    mSink=(nsIHTMLContentSink*)aParser->GetContentSink();
-    if((aNotifySink) && (mSink)) {
-        //now let's close automatically auto-opened containers...
-
-#ifdef DUMP_TO_FILE
-      if(gDumpFile) {
-        fprintf(gDumpFile, "
\n"); - fprintf(gDumpFile, "\n"); - fprintf(gDumpFile, "\n"); - fclose(gDumpFile); - } -#endif // DUMP_TO_FILE - - if(ePlainText!=mDocType) { - CEndToken theToken(eHTMLTag_pre); - nsCParserNode preNode(&theToken, 0/*stack token*/); - mSink->CloseContainer(eHTMLTag_pre); - - CEndToken bodyToken(eHTMLTag_body); - nsCParserNode bodyNode(&bodyToken, 0/*stack token*/); - mSink->CloseBody(); - - CEndToken htmlToken(eHTMLTag_html); - nsCParserNode htmlNode(&htmlToken, 0/*stack token*/); - mSink->CloseHTML(); - } - result = mSink->DidBuildModel(); - } - - START_TIMER(); - - } - -#ifdef RAPTOR_PERF_METRICS - NS_STOP_STOPWATCH(vsTimer); - printf("viewsource timer: "); - vsTimer.Print(); - printf("\n"); -#endif - - return result; -} - -/** - * Use this id you want to stop the building content model - * --------------[ Sets DTD to STOP mode ]---------------- - * It's recommended to use this method in accordance with - * the parser's terminate() method. - * - * @update harishd 07/22/99 - * @param - * @return - */ -NS_IMETHODIMP_(void) -CViewSourceHTML::Terminate() { -} - -NS_IMETHODIMP_(PRInt32) -CViewSourceHTML::GetType() { - return NS_IPARSER_FLAG_HTML; -} - -NS_IMETHODIMP -CViewSourceHTML::CollectSkippedContent(PRInt32 aTag, nsAString& aContent, PRInt32 &aLineNo) -{ - return NS_OK; -} - -/** - * - * @update gess5/18/98 - * @param - * @return - */ -NS_IMETHODIMP CViewSourceHTML::WillResumeParse(nsIContentSink* aSink){ - nsresult result = NS_OK; - if(mSink) { - result = mSink->WillResume(); - } - return result; -} - -/** - * - * @update gess5/18/98 - * @param - * @return - */ -NS_IMETHODIMP CViewSourceHTML::WillInterruptParse(nsIContentSink* aSink){ - nsresult result = NS_OK; - if(mSink) { - result = mSink->WillInterrupt(); - } - return result; -} - -/** - * Called by the parser to enable/disable dtd verification of the - * internal context stack. - * @update gess 7/23/98 - * @param - * @return - */ -void CViewSourceHTML::SetVerification(PRBool aEnabled) -{ -} - -/** - * This method is called to determine whether or not a tag - * of one type can contain a tag of another type. - * - * @update gess 3/25/98 - * @param aParent -- int tag of parent container - * @param aChild -- int tag of child container - * @return PR_TRUE if parent can contain child - */ -PRBool CViewSourceHTML::CanContain(PRInt32 aParent,PRInt32 aChild) const{ - PRBool result=PR_TRUE; - return result; -} - -/** - * Give rest of world access to our tag enums, so that CanContain(), etc, - * become useful. - */ -NS_IMETHODIMP -CViewSourceHTML::StringTagToIntTag(const nsAString &aTag, - PRInt32* aIntTag) const -{ - *aIntTag = nsHTMLTags::LookupTag(aTag); - - return NS_OK; -} - -NS_IMETHODIMP_(const PRUnichar *) -CViewSourceHTML::IntTagToStringTag(PRInt32 aIntTag) const -{ - const PRUnichar *str_ptr = nsHTMLTags::GetStringValue((nsHTMLTag)aIntTag); - - NS_ASSERTION(str_ptr, "Bad tag enum passed to COtherDTD::IntTagToStringTag()" - "!!"); - - return str_ptr; -} - -NS_IMETHODIMP_(nsIAtom *) -CViewSourceHTML::IntTagToAtom(PRInt32 aIntTag) const -{ - nsIAtom *atom = nsHTMLTags::GetAtom((nsHTMLTag)aIntTag); - - NS_ASSERTION(atom, "Bad tag enum passed to COtherDTD::IntTagToAtom()" - "!!"); - - return atom; -} - -PRBool CViewSourceHTML::IsBlockElement(PRInt32 aTagID,PRInt32 aParentID) const { - PRBool result=PR_FALSE; - return result; -} - -PRBool CViewSourceHTML::IsInlineElement(PRInt32 aTagID,PRInt32 aParentID) const { - PRBool result=PR_FALSE; - return result; -} - -/** - * This method gets called to determine whether a given - * tag is itself a container - * - * @update gess 3/25/98 - * @param aTag -- tag to test for containership - * @return PR_TRUE if given tag can contain other tags - */ -PRBool CViewSourceHTML::IsContainer(PRInt32 aTag) const{ - PRBool result=PR_TRUE; - return result; -} - -/** - * This method gets called when a tag needs to write it's attributes - * - * @update gess 3/25/98 - * @param - * @return result status - */ -nsresult CViewSourceHTML::WriteAttributes(PRInt32 attrCount) { - nsresult result=NS_OK; - - if(attrCount){ //go collect the attributes... - - CSharedVSContext& theContext=CSharedVSContext::GetSharedContext(); - - int attr = 0; - for(attr = 0; attr < attrCount; ++attr){ - CToken* theToken = mTokenizer->PeekToken(); - if(theToken) { - eHTMLTokenTypes theType = eHTMLTokenTypes(theToken->GetTokenType()); - if(eToken_attribute == theType){ - mTokenizer->PopToken(); //pop it for real... - theContext.mTokenNode.AddAttribute(theToken); //and add it to the node. - - CAttributeToken* theAttrToken = (CAttributeToken*)theToken; - const nsAString& theKey = theAttrToken->GetKey(); - - result = WriteTag(mKey,theKey,0,PR_FALSE); - const nsAString& theValue = theAttrToken->GetValue(); - - if(!theValue.IsEmpty() || theAttrToken->mHasEqualWithoutValue){ - result = WriteTag(mValue,theValue,0,PR_FALSE); - } - } - } - else return kEOF; - } - } - - return result; -} - -/** - * This method gets called when a tag needs to be sent out - * - * @update gess 3/25/98 - * @param - * @return result status - */ -nsresult CViewSourceHTML::WriteTag(PRInt32 aTagType,const nsAString & aText,PRInt32 attrCount,PRBool aNewlineRequired) { - static nsString theString; - - nsresult result=NS_OK; - - // adjust line number to what it will be after we finish writing this tag - // XXXbz life here sucks. We can't use the GetNewlineCount on the token, - // because Text tokens in - - - -

testcase 4

- -[span class z]
-

[span class a]

- -[span class z]
-

[span class a]

- -[span class z]
-

[span class a]

- -[span class z] -

[span class a]

- -
- diff --git a/htmlparser/tests/html/24462.html b/htmlparser/tests/html/24462.html deleted file mode 100644 index b73ed779f4dc..000000000000 --- a/htmlparser/tests/html/24462.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - -
- -
- - - -
- UNSER ANGEBOT -
- Foo -
- - diff --git a/htmlparser/tests/html/26347.html b/htmlparser/tests/html/26347.html deleted file mode 100644 index 40f740c6ab60..000000000000 --- a/htmlparser/tests/html/26347.html +++ /dev/null @@ -1,4 +0,0 @@ -Testcase for bug 68674 -www.mozilla.org -STRONG - This text should not be part of the link
- diff --git a/htmlparser/tests/html/26853.html b/htmlparser/tests/html/26853.html deleted file mode 100644 index d97a49f00a82..000000000000 --- a/htmlparser/tests/html/26853.html +++ /dev/null @@ -1,2 +0,0 @@ -foobar -
  • one
  • two diff --git a/htmlparser/tests/html/27490.html b/htmlparser/tests/html/27490.html deleted file mode 100644 index 9a589e50e8f2..000000000000 --- a/htmlparser/tests/html/27490.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - - - -
    - - Test -
    - Test -
    - - diff --git a/htmlparser/tests/html/30487.html b/htmlparser/tests/html/30487.html deleted file mode 100644 index acffe93fc67b..000000000000 --- a/htmlparser/tests/html/30487.html +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - -This text, in the body, is in "Font Face A" - - - - - -
    - - This text, in a table, is in "Font Face B". Immediately following -this text is a close font tag, and another erroneously placed close font -tag. - -
    This text, back in the body, should be in Font Face A, -but is not because Mozilla incorrectly heeded the second close-font tag in -the above table.
    - -
    \ No newline at end of file diff --git a/htmlparser/tests/html/3248.html b/htmlparser/tests/html/3248.html deleted file mode 100644 index 4373e317ad36..000000000000 --- a/htmlparser/tests/html/3248.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - - - - - -
      -
    1. Should be green - Linked with LINK HTTP Header.
    2. -
    3. Should be green - Linked with LINK
    4. -
    - - diff --git a/htmlparser/tests/html/32782.html b/htmlparser/tests/html/32782.html deleted file mode 100644 index 4a0f94b4aa13..000000000000 --- a/htmlparser/tests/html/32782.html +++ /dev/null @@ -1,8 +0,0 @@ - - -
    - - - -
    ***
    ***
    - \ No newline at end of file diff --git a/htmlparser/tests/html/35806.html b/htmlparser/tests/html/35806.html deleted file mode 100644 index b5d421a704dc..000000000000 --- a/htmlparser/tests/html/35806.html +++ /dev/null @@ -1,7 +0,0 @@ - - -last text diff --git a/htmlparser/tests/html/Table05h.html b/htmlparser/tests/html/Table05h.html deleted file mode 100644 index 5bdd4132fdf0..000000000000 --- a/htmlparser/tests/html/Table05h.html +++ /dev/null @@ -1,12 +0,0 @@ -tr terminates caption -
    ') - - - - - - - diff --git a/htmlparser/tests/html/40713.html b/htmlparser/tests/html/40713.html deleted file mode 100644 index 3b28ca529d89..000000000000 --- a/htmlparser/tests/html/40713.html +++ /dev/null @@ -1,19 +0,0 @@ - - -

    - - - -
    SHOULD BE TIMES ROMAN
    -

    - - - SHOULD BE HELVETICA - - - - - - - - diff --git a/htmlparser/tests/html/40809_CR.html b/htmlparser/tests/html/40809_CR.html deleted file mode 100644 index 6f0b6312a5ed..000000000000 --- a/htmlparser/tests/html/40809_CR.html +++ /dev/null @@ -1 +0,0 @@ - Rob Kaper's Capsi | Beyond Paradise

      Capsi | Beyond Paradise  
    Welcome to Capsi! This website is divided into the following sections:

    Section Description
    Personal Aptly named because it's all about me
    Code Various programs and utilities I have written
    CapsiChat My haven, or chat-box
    Starcrush Chicks and babes, lads and dudes.


    2000.05.19 Mission Back to Dot-Com

    I have made up my mind: I will drop the capsi.cx domain! Everything will move back to capsi.com and the Capsi services in Maryland will be hosted under the usa.capsi.com subdomain. Until the capsi.cx domain expires in February 2001 all requests will be forwarded to the proper capsi.com locations. I expect to make the big move in June or July.


    Please visit Capsi's sponsor
    [Please visit Capsi's sponsor]
    \ No newline at end of file diff --git a/htmlparser/tests/html/40809_LF.html b/htmlparser/tests/html/40809_LF.html deleted file mode 100644 index 26cdf143cd93..000000000000 --- a/htmlparser/tests/html/40809_LF.html +++ /dev/null @@ -1,259 +0,0 @@ - - - -Rob Kaper's Capsi | Beyond Paradise - - - - - - - - - -
      Capsi | Beyond Paradise  
    - -
    -Welcome to Capsi! This website is divided into the following sections: -

    -
    -
    - - - - - - - - - - - - - - - - -
    SectionDescription
    PersonalAptly named because it's all about me
    CodeVarious programs and utilities I have written
    CapsiChatMy haven, or chat-box
    StarcrushChicks and babes, lads and dudes.
    -
    -
    -

    -
    -
    - - - -
    2000.05.19 -Mission Back to Dot-Com -

    -I have made up my mind: I will drop the capsi.cx domain! Everything will -move back to capsi.com and the Capsi services in Maryland will be hosted -under the usa.capsi.com subdomain. Until the capsi.cx domain expires in -February 2001 all requests will be forwarded to the proper capsi.com -locations. I expect to make the big move in June or July. -

    -
    -
    -


    Please visit Capsi's sponsor
    [Please visit Capsi's sponsor]
    -
    - - diff --git a/htmlparser/tests/html/40855.html b/htmlparser/tests/html/40855.html deleted file mode 100644 index e6fc8197a84f..000000000000 --- a/htmlparser/tests/html/40855.html +++ /dev/null @@ -1,10 +0,0 @@ - - - -
    -This should be on all one line... but it isn't -
    - -
    - - \ No newline at end of file diff --git a/htmlparser/tests/html/43678.html b/htmlparser/tests/html/43678.html deleted file mode 100644 index 5ebed96c3be8..000000000000 --- a/htmlparser/tests/html/43678.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - -

    - Inside P but before TABLE - - - - -
    cell
    - After TABLE but inside P - - - - diff --git a/htmlparser/tests/html/44479.html b/htmlparser/tests/html/44479.html deleted file mode 100644 index 7b2a1e1ee673..000000000000 --- a/htmlparser/tests/html/44479.html +++ /dev/null @@ -1,27 +0,0 @@ -BUTTON in SPAN in SPAN / SPAN in BUTTON in SPAN - - - -

    BUTTON in SPAN in SPAN

    - - - - - -

    Text after button

    - -
    - -

    SPAN in BUTTON in SPAN

    - - - -

    Text after button

    - - \ No newline at end of file diff --git a/htmlparser/tests/html/44791.html b/htmlparser/tests/html/44791.html deleted file mode 100644 index d6a9aa7c607c..000000000000 --- a/htmlparser/tests/html/44791.html +++ /dev/null @@ -1,18 +0,0 @@ - - -

    -

    - - Your options on how to apply for the Job are:
    -
  • Fax your resume/statement of qualifications and cover letter -
  • -
    -
    - - - Your options on how to apply for the Job are:
    -
  • Fax your resume/statement of qualifications and cover letter -
  • -
    - - diff --git a/htmlparser/tests/html/47535.html b/htmlparser/tests/html/47535.html deleted file mode 100644 index 0887173b99e7..000000000000 --- a/htmlparser/tests/html/47535.html +++ /dev/null @@ -1,9 +0,0 @@ - - -
    - - -
    - - diff --git a/htmlparser/tests/html/48256.html b/htmlparser/tests/html/48256.html deleted file mode 100644 index 9cf04590cea6..000000000000 --- a/htmlparser/tests/html/48256.html +++ /dev/null @@ -1,32 +0,0 @@ - - - -Color Inheritance Test - - - - -

    P - Span O - A - Span I - - -

    -

    P - A - Span - -

    - - - diff --git a/htmlparser/tests/html/48351.html b/htmlparser/tests/html/48351.html deleted file mode 100644 index 4b52c6cefe12..000000000000 --- a/htmlparser/tests/html/48351.html +++ /dev/null @@ -1,13 +0,0 @@ -Mozilla Tests: An XHTML Document - - - - -

    An XHTML Document

    -

    This document is being parsed as XHTML.

    -

    This should not be emphasised text.

    -

    This should be a second paragraph.

    - \ No newline at end of file diff --git a/htmlparser/tests/html/50050.html b/htmlparser/tests/html/50050.html deleted file mode 100644 index ea6a14da7e36..000000000000 --- a/htmlparser/tests/html/50050.html +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - -
    - Single <DIV> -


    - -
    -
    -
    - Nested <DIV>s -
    -


    - - - diff --git a/htmlparser/tests/html/51161.html b/htmlparser/tests/html/51161.html deleted file mode 100644 index cd3c4d13270f..000000000000 --- a/htmlparser/tests/html/51161.html +++ /dev/null @@ -1,7 +0,0 @@ - - - -

    text text text text

    - \ No newline at end of file diff --git a/htmlparser/tests/html/53112.html b/htmlparser/tests/html/53112.html deleted file mode 100644 index a5a77e6bab1b..000000000000 --- a/htmlparser/tests/html/53112.html +++ /dev/null @@ -1,13 +0,0 @@ - - - - Mozilla: Stylesheet linking without "type" attribute - - - - -

    I should be green.

    - - diff --git a/htmlparser/tests/html/54651.html b/htmlparser/tests/html/54651.html deleted file mode 100644 index 644fb5778d29..000000000000 --- a/htmlparser/tests/html/54651.html +++ /dev/null @@ -1,37 +0,0 @@ -Child Selectors Test - - - - - -

    Below this box, every box should be within another. In other - words, the only SIBLINGS in this document should be this box and the - immediate next one. In particular, LI should allow the inner DIV to - nest within it.

    - -
    - DIV -
      -
    1. - LI -
      - DIV -

      - P -

      -
      -
    2. -
    -
    - - \ No newline at end of file diff --git a/htmlparser/tests/html/54845.html b/htmlparser/tests/html/54845.html deleted file mode 100644 index f13c1a055bba..000000000000 --- a/htmlparser/tests/html/54845.html +++ /dev/null @@ -1,37 +0,0 @@ - - - - - - -Start of Body Content - - - - -
    - -
    - - - - diff --git a/htmlparser/tests/html/56245_1.html b/htmlparser/tests/html/56245_1.html deleted file mode 100644 index d3ddd7e81236..000000000000 --- a/htmlparser/tests/html/56245_1.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - -italized text unknown - - - - -
    - cell 1 -
    - - - - diff --git a/htmlparser/tests/html/56245_2.html b/htmlparser/tests/html/56245_2.html deleted file mode 100644 index cc69db2c99a8..000000000000 --- a/htmlparser/tests/html/56245_2.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - - -
    - Cell-1 -
    -
    - - - - - - diff --git a/htmlparser/tests/html/56245_3.html b/htmlparser/tests/html/56245_3.html deleted file mode 100644 index 676a70fb64d1..000000000000 --- a/htmlparser/tests/html/56245_3.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - red unknown -
    div a - - div b -
    - normal text - -
    - Cell-2 -
    - - - - diff --git a/htmlparser/tests/html/58455.html b/htmlparser/tests/html/58455.html deleted file mode 100644 index b5fc562d07da..000000000000 --- a/htmlparser/tests/html/58455.html +++ /dev/null @@ -1,12 +0,0 @@ - - - Computer Market Online - \ - - - - - -
    This shows
    This does not show
    - -x \ No newline at end of file diff --git a/htmlparser/tests/html/58809.html b/htmlparser/tests/html/58809.html deleted file mode 100644 index 1ee5225a409e..000000000000 --- a/htmlparser/tests/html/58809.html +++ /dev/null @@ -1,12 +0,0 @@ - - - 'A'
    - 'A'
    - 'b'
    - 'c'
    - 'd'
    - 'e'
    - 'f'
    - - - diff --git a/htmlparser/tests/html/6148.html b/htmlparser/tests/html/6148.html deleted file mode 100644 index 7965768f5c9e..000000000000 --- a/htmlparser/tests/html/6148.html +++ /dev/null @@ -1,29 +0,0 @@ - - -Bug - - - -BUG:
    - -<SPAN> -
    <DIV> -<SPAN> -</SPAN> -</DIV>
    -</SPAN>
    - -
    - -NO BUG:
    - -<SPAN> -
    <DIV> -<B> -</B> -</DIV>
    -</SPAN>
    - - - - diff --git a/htmlparser/tests/html/65467.html b/htmlparser/tests/html/65467.html deleted file mode 100644 index 04ea13953610..000000000000 --- a/htmlparser/tests/html/65467.html +++ /dev/null @@ -1,18 +0,0 @@ - - - -Bug: 65467 - - - - -
    -
    The center tag's border "should" go around this text.
    - - - - - - diff --git a/htmlparser/tests/html/67569.html b/htmlparser/tests/html/67569.html deleted file mode 100644 index fab5bc74b053..000000000000 --- a/htmlparser/tests/html/67569.html +++ /dev/null @@ -1,14 +0,0 @@ - - -

    - - - - -
    - First line -

    - Second line -
    - - diff --git a/htmlparser/tests/html/67874.html b/htmlparser/tests/html/67874.html deleted file mode 100644 index 83348defdc4d..000000000000 --- a/htmlparser/tests/html/67874.html +++ /dev/null @@ -1,4 +0,0 @@ - - Did you know that your Browser is buggy: it can't parse -comments correctly. --> - diff --git a/htmlparser/tests/html/69576.html b/htmlparser/tests/html/69576.html deleted file mode 100644 index 8108370fc7be..000000000000 --- a/htmlparser/tests/html/69576.html +++ /dev/null @@ -1,33 +0,0 @@ - - - -Row,Col - - - - - -
    - 1,1 - - 1,2 - - - - - - - - - - -
    - 1,1,1 - - 1,1,2 -
    - 1,2,2 -
    -
    - - diff --git a/htmlparser/tests/html/70148.html b/htmlparser/tests/html/70148.html deleted file mode 100644 index f76390c0a8c0..000000000000 --- a/htmlparser/tests/html/70148.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - testcase - - - All the stuff after - &lang got munched. - - diff --git a/htmlparser/tests/html/77352.html b/htmlparser/tests/html/77352.html deleted file mode 100644 index 8816a629f734..000000000000 --- a/htmlparser/tests/html/77352.html +++ /dev/null @@ -1,12 +0,0 @@ - - - - size +2 -

    - PPPP -

    - H6blah -
    - size +2 - - diff --git a/htmlparser/tests/html/77746.html b/htmlparser/tests/html/77746.html deleted file mode 100644 index a042fa144c18..000000000000 --- a/htmlparser/tests/html/77746.html +++ /dev/null @@ -1,3 +0,0 @@ - -

    bold red

    neither bold nor red - \ No newline at end of file diff --git a/htmlparser/tests/html/78444.html b/htmlparser/tests/html/78444.html deleted file mode 100644 index 9c1388243940..000000000000 --- a/htmlparser/tests/html/78444.html +++ /dev/null @@ -1,15 +0,0 @@ - - - -coldplay - - - - - - - -<body bgcolor="#FFFFFF"> - -</body> - diff --git a/htmlparser/tests/html/78848.html b/htmlparser/tests/html/78848.html deleted file mode 100644 index efcb487946de..000000000000 --- a/htmlparser/tests/html/78848.html +++ /dev/null @@ -1,117 +0,0 @@ - - -Testcase that cause hangup - - - - - - - - -
    - - - - - - -
    - - - - - -
    - - - - - - -
    - - - - -
      - -
    - - - - -
      - -
    - - - - - -
      - -
    - - - - -
      - -
    - - - - - - - -On this site : - -
      - -
    -
    - - -
    - -
    - -
    - -
    - - - \ No newline at end of file diff --git a/htmlparser/tests/html/79492.html b/htmlparser/tests/html/79492.html deleted file mode 100644 index adde95f204c4..000000000000 --- a/htmlparser/tests/html/79492.html +++ /dev/null @@ -1,8 +0,0 @@ - -PG&E &E: &e. &e; &&e &&e; - - -

    Title should look like this:

    -

    PG&E &E: &e. &e; &&e &&e;

    - - diff --git a/htmlparser/tests/html/84000.html b/htmlparser/tests/html/84000.html deleted file mode 100644 index 62065b975a6b..000000000000 --- a/htmlparser/tests/html/84000.html +++ /dev/null @@ -1,12 +0,0 @@ - - -IMG without attribute - - - - - -<img src="mozilla.gif"> - - - diff --git a/htmlparser/tests/html/84491.html b/htmlparser/tests/html/84491.html deleted file mode 100644 index c2b2638c8ada..000000000000 --- a/htmlparser/tests/html/84491.html +++ /dev/null @@ -1,8 +0,0 @@ - - -
    - foo - - diff --git a/htmlparser/tests/html/88746.html b/htmlparser/tests/html/88746.html deleted file mode 100644 index f66e99721075..000000000000 --- a/htmlparser/tests/html/88746.html +++ /dev/null @@ -1,16 +0,0 @@ - - -Debian Planet - - - - - - - - - - - - diff --git a/htmlparser/tests/html/88992.html b/htmlparser/tests/html/88992.html deleted file mode 100644 index ab1757698fe2..000000000000 --- a/htmlparser/tests/html/88992.html +++ /dev/null @@ -1,15 +0,0 @@ - - - -⟨ - - - -
    & -
    div

    p<I>I</div></I> -
    - - italic - This text doesn't show up.! - - diff --git a/htmlparser/tests/html/91051.html b/htmlparser/tests/html/91051.html deleted file mode 100644 index 5ae9ced6e43d..000000000000 --- a/htmlparser/tests/html/91051.html +++ /dev/null @@ -1,6 +0,0 @@ - -This should be visible. -

    -This is a new paragraph. - - diff --git a/htmlparser/tests/html/92530.html b/htmlparser/tests/html/92530.html deleted file mode 100644 index 4f12c42d795f..000000000000 --- a/htmlparser/tests/html/92530.html +++ /dev/null @@ -1,34 +0,0 @@ - - - - Bugscape Bug 5040 - - - - - - - -
    -
    - - - - - - - -
    - - CONTENT PROMOTED ABOVE INNER TABLE - - - -
    ROW 1 CELL 1
    -
    ROW 2 CELL 1
    -
    -
    - - diff --git a/htmlparser/tests/html/93365.html b/htmlparser/tests/html/93365.html deleted file mode 100644 index 524945b1c6b9..000000000000 --- a/htmlparser/tests/html/93365.html +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - -this text is above a table. - -
    -this text is inside a table. -
    - -this text is below a table. - - - \ No newline at end of file diff --git a/htmlparser/tests/html/94208.html b/htmlparser/tests/html/94208.html deleted file mode 100644 index c9cacb2d28a4..000000000000 --- a/htmlparser/tests/html/94208.html +++ /dev/null @@ -1,9 +0,0 @@ - - - - blue bold,blue bold italic, - not blue but bold and italic, - red bold and italic, - red and italic but not bold,red not bold not italic - - diff --git a/htmlparser/tests/html/96130.html b/htmlparser/tests/html/96130.html deleted file mode 100644 index ae546180d1cb..000000000000 --- a/htmlparser/tests/html/96130.html +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - Should be above the Table - - - -
    - Cell -
    - diff --git a/htmlparser/tests/html/Bug1203.html b/htmlparser/tests/html/Bug1203.html deleted file mode 100644 index bd84bbe5c6a7..000000000000 --- a/htmlparser/tests/html/Bug1203.html +++ /dev/null @@ -1,29 +0,0 @@ - - - How To Give Good Feedback - -

    - Peters test
    - Peters test
    - - - - - -
    - - - - How To Give Good Feedback - - -
    -

    first appeared: Fast Company issue 17 page 144

    -

    - Improving Performance Reviews
    -


    - So you think you've had it with performance reviews? - A few years ago, Chris Oster's unit at General Motors got so fed up with traditional reviews that it abolished them. -
    - - diff --git a/htmlparser/tests/html/Bug1220.html b/htmlparser/tests/html/Bug1220.html deleted file mode 100644 index 38f12713548c..000000000000 --- a/htmlparser/tests/html/Bug1220.html +++ /dev/null @@ -1,33 +0,0 @@ - - - - - - - - - - - - - - -
    - -
    xxx
    - - \ No newline at end of file diff --git a/htmlparser/tests/html/Bug1239.html b/htmlparser/tests/html/Bug1239.html deleted file mode 100644 index 215dd499d53b..000000000000 --- a/htmlparser/tests/html/Bug1239.html +++ /dev/null @@ -1,44 +0,0 @@ - - - -bug1239 -Next...

    - - -
    - - - Centered arial size=1 [BREAK] -
    - -
    - - - - Bold Arial font size=2 - -

    - Maroon Arial font size=1 -
    - - Bold Arial font size=2 - - [ Arial font size=1 ] -
    - -

    Centered Arial size=1 -

    -

    Centered Arial size=1 -

    - - Bold Arial size=2 -
    -
    - - Centered Arial size=1 -

    -
    -
    - -
    - diff --git a/htmlparser/tests/html/Entity.html b/htmlparser/tests/html/Entity.html deleted file mode 100644 index c84831fc6d8e..000000000000 --- a/htmlparser/tests/html/Entity.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - Welcome to MSN.com - - - - fidelity - - diff --git a/htmlparser/tests/html/ListGen.pl b/htmlparser/tests/html/ListGen.pl deleted file mode 100755 index c8c71a27c4e4..000000000000 --- a/htmlparser/tests/html/ListGen.pl +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/perl - -use Cwd; - -$curr_dir=`cd`; - -open(OUTFILE,">file_list.txt") || die "Can't open file_list.txt $!"; -opendir(D,"."); - -@files=readdir(D); -$curr_dir=~s/\\/\//g; -chomp($curr_dir); - -foreach $file(@files) { - if($file=~m/\.*m/) { - print OUTFILE "$file\n"; - } -} - - diff --git a/htmlparser/tests/html/README b/htmlparser/tests/html/README deleted file mode 100644 index f267cb0b8014..000000000000 --- a/htmlparser/tests/html/README +++ /dev/null @@ -1,9 +0,0 @@ -To run parser regression test do the following: - -1) perl ListGen.pl ( this will create file_list.txt ) -2) make ( build TestParser.exe ) -3) perl TestParser.pl -b ://mozilla/dist/bin file_list.txt - ( creates base line files. Note: Should be run _without_ your changes ) -4) apply your patch. -5) perl TestParser.pl -v ://mozilla/dist/bin file_list.txt - ( creates .v files and compares against base line. ) diff --git a/htmlparser/tests/html/RTestDTD.pl b/htmlparser/tests/html/RTestDTD.pl deleted file mode 100644 index 70c11158c1e7..000000000000 --- a/htmlparser/tests/html/RTestDTD.pl +++ /dev/null @@ -1,94 +0,0 @@ -#/usr/bin/perl - -die "\nUsage: perl RtestDTD.pl [FLAG] url_list.txt. - -FLAGS: - -v = verify ( would create rtest_html.veri ) - -b = baseline ( would create rtest_html.base ) - -help = info. on running the script\n" -if( @ARGV[0]!~/-help/ && ((@ARGV >2 || @ARGV < 2) || ($ARGV[0]!~m/-v/ && $ARGV[0]!~m/-b/))); - -if($ARGV[0]=~m/-help/) { - print " - 1. \"url_list.txt\" can be generated by running UrlGen.pl - Ex. perl UrlGen.pl. - 2. Run base line (-b) before making changes in your tree. - This will generate rtest_html.base file [DO NOT CHANGE THIS]. - 3. Run verification (-v) after making changes in your tree. - This will generate rtest_html.veri file and will compare - rtest_html.base and rtest_html.veri. - -NOTE: Need perl version 5.0 or greater. \n"; - exit(0); -} - -use Cwd; - -$ENV{"PARSER_DUMP_CONTENT"}=1; - -($drive,@path)=split(/:/,`cd`); - -system("$drive:\\mozilla\\dist\\win32_d.obj\\bin\\viewer.exe -f $ARGV[1]"); - -if($ARGV[0]=~m/-b/) { - rename("rtest_html.txt","rtest_html.base"); -} -elsif($ARGV[0]=~m/-v/) { - rename("rtest_html.txt","rtest_html.veri"); - @result=CompareFiles("rtest_html.base","rtest_html.veri"); - Display(@result); -} - -sub CompareFiles { - - open(BASE,"<$_[0]") || die "Can't output $_[0] $!"; - open(VERI,"<$_[1]") || die "Can't output $_[1] $!"; - - #Separate file contents into URL and DOCUMENT - - while() { - ($url,$document1)=split/;/; - push(@url1,$url); - push(@documents1,$document1); - } - while() { - ($url,$document2)=split/;/; - push(@url2,$url); - push(@documents2,$document2); - } - - #Search for documents that don't match - - for($i=0;$i<=$#documents1+1;$i++) { - if($documents1[$i] !~ /$documents2[$i]/) { - (@string1)=split(//,$documents1[$i]); - (@string2)=split(//,$documents2[$i]); - for($j=0;$j<=$#string1;$j++) { - # Find the character that failed - if($string1[$j] !~ /$string2[$j]/) { - push(@result,$url1[$i],$string2[$j],$j); - $j=@string1; # Stop looping - } - } - } - } - close(BASE); - close(VERI); - - return @result; -} - - -sub Display { - for($i=0;$i<@_;$i++) { - print "\n $_[$i++] failed on character '$_[$i++]' at location $_[$i]"; - } - print "\n--------------------\n"; - $count=@_/3; - if(($count)>0) { - print "$count url(s) failed\n"; - } - else { - print "\nALL SUCCEEDED\n"; - } -} diff --git a/htmlparser/tests/html/Table01.html b/htmlparser/tests/html/Table01.html deleted file mode 100644 index c320ee150d91..000000000000 --- a/htmlparser/tests/html/Table01.html +++ /dev/null @@ -1,11 +0,0 @@ -first text - - - -
    This is the first cell in the table - This is the second cell in the table -
    This is the third cell in the table - This is the fourth cell in the table -
    - -last text diff --git a/htmlparser/tests/html/Table02.html b/htmlparser/tests/html/Table02.html deleted file mode 100644 index 6ca6807fd8e8..000000000000 --- a/htmlparser/tests/html/Table02.html +++ /dev/null @@ -1,14 +0,0 @@ -nested table, good syntax. - - - -
    This is the first cell in the table - This is the second cell in the table - - -
    inner cell -
    -
    This is the third cell in the table - This is the fourth cell in the table -
    - diff --git a/htmlparser/tests/html/Table03.html b/htmlparser/tests/html/Table03.html deleted file mode 100644 index 6a617ae20d4d..000000000000 --- a/htmlparser/tests/html/Table03.html +++ /dev/null @@ -1,11 +0,0 @@ -first text - - - -
    This is the first cell in the table - This is the second cell in the table -
    This is the third cell in the table - This is the fourth cell in the table -
    - -last text diff --git a/htmlparser/tests/html/Table04.html b/htmlparser/tests/html/Table04.html deleted file mode 100644 index 67b747b93b4d..000000000000 --- a/htmlparser/tests/html/Table04.html +++ /dev/null @@ -1,53 +0,0 @@ -first text -
      -
    • -list line 1 - -
    • -list line 2 - - - -
      This is the first cell in the table -

      paragraph in cell. -

      This is the second cell in the table -
        -
      • list item1 in cell -
      • list item2 in cell -
          -
      This is the third cell in the table - This is the fourth cell in the table -
      - -
    • -list line 3 -

      paragraph in list line 3. -

    - -table aligned: -
      -
    • -list line 1 - -
    • -list line 2 - - - -
      This is the first cell in the table -

      paragraph in cell. -

      This is the second cell in the table -
        -
      • list item1 in cell -
      • list item2 in cell -
          -
      This is the third cell in the table - This is the fourth cell in the table -
      - -
    • -list line 3 -

      paragraph in list line 3. -

    - -last text diff --git a/htmlparser/tests/html/Table05.html b/htmlparser/tests/html/Table05.html deleted file mode 100644 index 66006e503ca7..000000000000 --- a/htmlparser/tests/html/Table05.html +++ /dev/null @@ -1,12 +0,0 @@ -The only syntax for Caption is after table start tag. - - - - -
    caption1
    cell 1-1 - cell 1-2 -
    cell 2-1 - cell 2-2 -
    - -last text diff --git a/htmlparser/tests/html/Table05e.html b/htmlparser/tests/html/Table05e.html deleted file mode 100644 index b8707a8145de..000000000000 --- a/htmlparser/tests/html/Table05e.html +++ /dev/null @@ -1,21 +0,0 @@ -Multiple caption elements, all outside td cells. -
    Nav 4.0: the first caption is accepted, -Others are treated as text, and moved before the table. -
    Xena6.0: ignore misplaced caption tag. contents treated as text. - - TextAA - TextA - TextB - - TextC - - TextD - - TextE - - TextF -
    caption1caption2
    caption3
    cell 1-1
    caption4
    cell 1-2 -
    caption5
    cell 2-1 - cell 2-2
    caption6
    - -last text diff --git a/htmlparser/tests/html/Table05f.html b/htmlparser/tests/html/Table05f.html deleted file mode 100644 index e6f4b49d678f..000000000000 --- a/htmlparser/tests/html/Table05f.html +++ /dev/null @@ -1,16 +0,0 @@ -Multiple Captions inside cells, -
    Nav. 4.0: last one takes effect, and close td tr. - previous captions are totally ignored. - If there were no caption3, caption2 would take effect. -
    Xena 6.0: Ignore misplaced caption tag, contents treated as text. - - - - TextA - - TextB -
    caption1
    cell 1-1
    caption2
    cell 1-2 -
    cell 2-1 - cell 2-2
    caption3
    - -last text diff --git a/htmlparser/tests/html/Table05g.html b/htmlparser/tests/html/Table05g.html deleted file mode 100644 index 2ca55ca0aa6b..000000000000 --- a/htmlparser/tests/html/Table05g.html +++ /dev/null @@ -1,13 +0,0 @@ -Captions outside of table are treated as text. -
    caption1
    - - -
    cell 1-1 - cell 1-2 -
    cell 2-1 - cell 2-2 -
    - -
    caption2
    - - -
    caption1 -
    cell 1-1 - cell 1-2 -
    cell 2-1 - cell 2-2 -
    - -last text diff --git a/htmlparser/tests/html/Table05i.html b/htmlparser/tests/html/Table05i.html deleted file mode 100644 index 105e4ab9e554..000000000000 --- a/htmlparser/tests/html/Table05i.html +++ /dev/null @@ -1,13 +0,0 @@ -tr terminates Caption, and close open(font)tags in caption. -
    also terminates those tags. In this case, the Font tag. - - - -
    caption1
    text1 text2 -
    cell 1-1 - cell 1-2 -
    cell 2-1 - cell 2-2 -
    - -last text diff --git a/htmlparser/tests/html/Table05j.html b/htmlparser/tests/html/Table05j.html deleted file mode 100644 index cd5c67421065..000000000000 --- a/htmlparser/tests/html/Table05j.html +++ /dev/null @@ -1,11 +0,0 @@ -TD does NOT terminate Caption. TD's contents are used in caption. - - -
    caption1
    text1 text2 -
    cell 1-1 - cell 1-2 -
    cell 2-1 - cell 2-2 no br in caption. -
    - -last text diff --git a/htmlparser/tests/html/Table06.html b/htmlparser/tests/html/Table06.html deleted file mode 100644 index 422da6c13598..000000000000 --- a/htmlparser/tests/html/Table06.html +++ /dev/null @@ -1,13 +0,0 @@ -extra end table tag is ignored. - - - -
    This is the first cell in the table - This is the second cell in the table -
    This is the third cell in the table - This is the fourth cell in the table -
    - - - -last text diff --git a/htmlparser/tests/html/Table_illegal_1.html b/htmlparser/tests/html/Table_illegal_1.html deleted file mode 100644 index 8e7b151c4d69..000000000000 --- a/htmlparser/tests/html/Table_illegal_1.html +++ /dev/null @@ -1,62 +0,0 @@ - - - Bad Content - - - - - - - - -
    -

    some text - two

    - - -

    ss -

    - -
    three
    - - - -

    one -

    -

    one last -

    -
    - -

    last -

    -

    two -

    -

    two last -

    -
    - - -

    three -

    -

    three last -

    -
    - - - - -
    - 4TD -
    -
    -
    -
    - - - diff --git a/htmlparser/tests/html/Table_illegal_2.html b/htmlparser/tests/html/Table_illegal_2.html deleted file mode 100644 index 6423ec9df0d6..000000000000 --- a/htmlparser/tests/html/Table_illegal_2.html +++ /dev/null @@ -1,41 +0,0 @@ - - - Bad Content - - - - -

    redhellothere -

    -
    - - -

    blue -

    -
    - - - - - - - - - - - diff --git a/htmlparser/tests/html/list002.html b/htmlparser/tests/html/list002.html deleted file mode 100644 index f5c31fb40792..000000000000 --- a/htmlparser/tests/html/list002.html +++ /dev/null @@ -1,15 +0,0 @@ - -Welcome to Hewlett-Packard - - - -This file was created on 9/16/97 for bug #85117, assert in parser -when closing dd tag. - - -abcd efg - - - - - diff --git a/htmlparser/tests/html/list003.html b/htmlparser/tests/html/list003.html deleted file mode 100644 index c47e5ab50945..000000000000 --- a/htmlparser/tests/html/list003.html +++ /dev/null @@ -1,19 +0,0 @@ - - - -text line 1 - -Text after font color=red -
      -
    • first list item -

      still in list item. -

    • . second list item. - ... -
    - -Text after list, still in font color=red -
    -Text after font color=red. -text at tend. - - \ No newline at end of file diff --git a/htmlparser/tests/html/newlines.html b/htmlparser/tests/html/newlines.html deleted file mode 100644 index a949cc70e902..000000000000 --- a/htmlparser/tests/html/newlines.html +++ /dev/null @@ -1,3 +0,0 @@ -#1: This is a line of input terminated by a \n -#2: This is another line of input terminated by a \r\n -#3: This is a line of input terminated by a \r #4: This is the last line diff --git a/htmlparser/tests/html/nulltest.html b/htmlparser/tests/html/nulltest.html deleted file mode 100644 index d6502b6f8ead..000000000000 Binary files a/htmlparser/tests/html/nulltest.html and /dev/null differ diff --git a/htmlparser/tests/html/obj001.html b/htmlparser/tests/html/obj001.html deleted file mode 100644 index 823b1939d430..000000000000 --- a/htmlparser/tests/html/obj001.html +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - Java applet that plays a welcoming sound. - - - diff --git a/htmlparser/tests/html/obj002.html b/htmlparser/tests/html/obj002.html deleted file mode 100644 index b0805ba8eae6..000000000000 --- a/htmlparser/tests/html/obj002.html +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/htmlparser/tests/html/obj003.html b/htmlparser/tests/html/obj003.html deleted file mode 100644 index e7cb0f03ee26..000000000000 --- a/htmlparser/tests/html/obj003.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - - object element with no attributes. - object element with lang attribute set to en. - object element with dir attribute set to ltr. - - param element with no attributes:
    - -
    - - param element with the name attribute and an end tag:
    - -
    - - diff --git a/htmlparser/tests/html/option.html b/htmlparser/tests/html/option.html deleted file mode 100644 index 8eb71a815b35..000000000000 --- a/htmlparser/tests/html/option.html +++ /dev/null @@ -1,9 +0,0 @@ - - - -
    - - -

    green -

    -

    orange -

    - - - diff --git a/htmlparser/tests/html/TestParser.cpp b/htmlparser/tests/html/TestParser.cpp deleted file mode 100644 index a849222eb1a8..000000000000 --- a/htmlparser/tests/html/TestParser.cpp +++ /dev/null @@ -1,133 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "nsXPCOM.h" -#include "nsIComponentManager.h" -#include "nsParserCIID.h" -#include "nsIParser.h" -#include "nsILoggingSink.h" -#include "nsIInputStream.h" - -// Class IID's -static NS_DEFINE_CID(kParserCID, NS_PARSER_CID); -static NS_DEFINE_CID(kLoggingSinkCID, NS_LOGGING_SINK_CID); -static NS_DEFINE_CID(kNavDTDCID, NS_CNAVDTD_CID); - -//---------------------------------------------------------------------- - -nsresult ParseData(char* anInputStream,char* anOutputStream) { - NS_ENSURE_ARG_POINTER(anInputStream); - NS_ENSURE_ARG_POINTER(anOutputStream); - - nsresult result = NS_OK; - - // Create a parser - nsCOMPtr parser(do_CreateInstance(kParserCID, &result)); - if (NS_FAILED(result)) { - printf("\nUnable to create a parser\n"); - return result; - } - // Create a sink - nsCOMPtr sink(do_CreateInstance(kLoggingSinkCID, &result)); - if (NS_FAILED(result)) { - printf("\nUnable to create a sink\n"); - return result; - } - // Create a dtd - nsCOMPtr dtd(do_CreateInstance(kNavDTDCID, &result)); - if(NS_FAILED(result)) { - printf("Unable to create a dtd\n"); - return result; - } - - PRFileDesc* in = PR_Open(anInputStream, PR_RDONLY, 777); - if (!in) { - printf("\nUnable to open input file - %s\n", anInputStream); - return result; - } - - PRFileDesc* out = PR_Open(anOutputStream, PR_CREATE_FILE|PR_WRONLY, 777); - if (!out) { - printf("\nUnable to open output file - %s\n", anOutputStream); - return result; - } - - nsString stream; - char buffer[1024] = {0}; // XXX Yikes! - PRBool done = PR_FALSE; - PRInt32 length = 0; - while(!done) { - length = PR_Read(in, buffer, sizeof(buffer)); - if (length != 0) { - stream.AppendWithConversion(buffer, length); - } - else { - done=PR_TRUE; - } - } - - sink->SetOutputStream(out); - parser->RegisterDTD(dtd); - parser->SetContentSink(sink); - result = parser->Parse(stream, 0, NS_LITERAL_CSTRING("text/html"), PR_FALSE, PR_TRUE); - - PR_Close(in); - PR_Close(out); - - return result; -} - - -//--------------------------------------------------------------------- - -int main(int argc, char** argv) -{ - if (argc < 3) { - printf("\nUsage: \n"); - return -1; - } - - nsresult rv = NS_InitXPCOM2(nsnull, nsnull, nsnull); - if (NS_FAILED(rv)) { - printf("NS_InitXPCOM2 failed\n"); - return -1; - } - - ParseData(argv[1],argv[2]); - - return 0; -} diff --git a/htmlparser/tests/html/TestParser.pl b/htmlparser/tests/html/TestParser.pl deleted file mode 100755 index 1097887f24c9..000000000000 --- a/htmlparser/tests/html/TestParser.pl +++ /dev/null @@ -1,37 +0,0 @@ -#! /usr/bin/perl - - use Cwd; - - die "\nUsage: perl TestParser.pl [-b|-v] - b -> create baseline - v -> verify changes - bin-path -> Ex. ://mozilla/dist/bin - filelist -> Run ListGen.pl which will yield file_list.txt\n" - - if(@ARGV < 3 || @ARGV > 3); - - open(FILE_LIST,$ARGV[2]) || die "\nCannot open $ARGV[2]\n"; - - if($ARGV[0] eq "-b") { - foreach $input() { - $input =~s/\n//g; - @output=split(/\./,$input); - print "\n$input\n"; - system("$ARGV[1]/TestParser.exe $input $output[0].b"); - } - } - elsif($ARGV[0] eq "-v") { - foreach $input() { - $input =~s/\n//g; - @output=split(/\./,$input); - print "\n$input\n"; - system("$ARGV[1]/TestParser.exe $input $output[0].v"); - system("diff -u $output[0].b $output[0].v"); - } - } - else { - print "\n\"$ARGV[0]\" unknown....\n"; - print "\nUsage: perl TestParser.pl [-b|-v] \n\n"; - } - - close(FILE_LIST); diff --git a/htmlparser/tests/html/UrlGen.pl b/htmlparser/tests/html/UrlGen.pl deleted file mode 100644 index c3f728b1271a..000000000000 --- a/htmlparser/tests/html/UrlGen.pl +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/perl - -use Cwd; - -$curr_dir=`cd`; - -open(OUTFILE,">url_list.txt") || die "Can't open url.txt $!"; -opendir(D,"."); - -@files=readdir(D); -$curr_dir=~s/\\/\//g; -chomp($curr_dir); - -foreach $file(@files) { - if($file=~m/\.htm/) { - print OUTFILE "file:///$curr_dir/$file\n"; - } -} - - diff --git a/htmlparser/tests/html/acronym1.html b/htmlparser/tests/html/acronym1.html deleted file mode 100644 index fddd4c2ffffc..000000000000 --- a/htmlparser/tests/html/acronym1.html +++ /dev/null @@ -1,14 +0,0 @@ - - -test ACRONYM tag - - -test Acronym
    -1normal text in body line 1. - -A This text is in Acronym -
    B line 2 in Acronym -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/aname01.html b/htmlparser/tests/html/aname01.html deleted file mode 100644 index 97535b87f0de..000000000000 --- a/htmlparser/tests/html/aname01.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - - -
    Note! - -
    - -
    - - - -This is from http://margaux/manual-proxy-20/cfgservr.htm - - - diff --git a/htmlparser/tests/html/atoi01.html b/htmlparser/tests/html/atoi01.html deleted file mode 100644 index ccbf33542eaa..000000000000 --- a/htmlparser/tests/html/atoi01.html +++ /dev/null @@ -1,13 +0,0 @@ -first text - size '3' - size '_4' - size '_5z' - size ' 4%' - size '_5_' - size '+2' - size '_+2' - size '+_3' - size '_+3' - size -1 - -last text diff --git a/htmlparser/tests/html/attribute_quote_bug1.html b/htmlparser/tests/html/attribute_quote_bug1.html deleted file mode 100644 index a2544d037d04..000000000000 --- a/htmlparser/tests/html/attribute_quote_bug1.html +++ /dev/null @@ -1,13 +0,0 @@ - -

    Same text as in "); - document.write("hello there"); - //--> - - -Done - - - diff --git a/htmlparser/tests/html/bigtxt.html b/htmlparser/tests/html/bigtxt.html deleted file mode 100644 index ff44cce8c354..000000000000 --- a/htmlparser/tests/html/bigtxt.html +++ /dev/null @@ -1,3701 +0,0 @@ - -

    -RFC788 - - - - - - - - SIMPLE MAIL TRANSFER PROTOCOL - - - - Jonathan B. Postel - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - November 1981 - - - - Information Sciences Institute - University of Southern California - 4676 Admiralty Way - Marina del Rey, California 90291 - - (213) 822-1511 - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - TABLE OF CONTENTS - - 1. INTRODUCTION .................................................. 1 - - 2. THE SMTP MODEL ................................................ 2 - - 3. THE SMTP PROCEDURE ............................................ 4 - - 3.1. Mail ..................................................... 4 - 3.2. Forwarding ............................................... 7 - 3.3. Verifying and Expanding .................................. 8 - 3.4. Sending and Mailing ..................................... 10 - 3.5. Opening and Closing ..................................... 12 - 3.6. Relaying ................................................ 13 - 3.7. Domains ................................................. 15 - - 4. THE SMTP SPECIFICATIONS ...................................... 16 - - 4.1. SMTP Commands ........................................... 16 - 4.1.1. Command Semantics ..................................... 16 - 4.1.2. Command Syntax ........................................ 23 - 4.2. SMTP Replies ............................................ 28 - 4.2.1. Reply Codes by Function Group ......................... 29 - 4.2.2. Reply Codes in Numeric Order .......................... 30 - 4.3. Sequencing of Commands and Replies ...................... 31 - 4.4. State Diagrams .......................................... 33 - 4.5. Details ................................................. 35 - 4.5.1. Minimum Implementation ................................ 35 - 4.5.2. Transparency .......................................... 35 - 4.5.3. Sizes ................................................. 36 - - APPENDIX A: TCP ................................................. 38 - APPENDIX B: NCP ................................................. 39 - APPENDIX C: NITS ................................................ 40 - APPENDIX D: X.25 ................................................ 41 - APPENDIX E: Theory of Reply Codes ............................... 42 - APPENDIX F: Scenarios ........................................... 45 - - GLOSSARY ......................................................... 58 - - REFERENCES ....................................................... 61 - - -Network Working Group J. Postel -Request for Comments: 788 ISI -Replaces: RFC 780, 772 November 1981 - - SIMPLE MAIL TRANSFER PROTOCOL - - -1. INTRODUCTION - - The objective of Simple Mail Transfer Protocol (SMTP) is to transfer - mail reliably and efficiently. - - SMTP is independent of the particular transmission subsystem and - requires only a reliable ordered data stream channel. Appendices A, - B, C, and D describe the use of SMTP with various transport services. - A Glossary provides the definitions of terms as used in this - document. - - An important feature of SMTP is its capability to relay mail across - transport service environments. A transport service provides an - interprocess communication environment (IPCE). An IPCE may cover one - network, several networks, or a subset of a network. It is important - to realize that transport systems (or IPCEs) are not one-to-one with - networks. A process can communicate directly with another process - through any mutually known IPCE. Mail is an application or use of - interprocess communication. Mail can be communicated between - processes in different IPCEs by relaying through a process connected - to two (or more) IPCEs. More specifically, mail can be relayed - between hosts on different transport systems by a host on both - transport systems. - - - - - - - - - - - - - - - - - - - - - - - - -Postel [Page 1] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - -2. THE SMTP MODEL - - The SMTP design is based on the following model of communication: as - the result of a user mail request, the sender-SMTP establishes a - full-duplex transmission channel to a receiver-SMTP. The - receiver-SMTP may be either the ultimate destination or an - intermediate. SMTP commands are generated by the sender-SMTP and - sent to the receiver-SMTP. SMTP replies are sent from the - receiver-SMTP to the sender-SMTP in response to the commands. - - Once the transmission channel is established, the SMTP-sender sends a - MAIL command indicating the sender of the mail. If the SMTP-receiver - can accept mail it responds with an OK reply. The SMTP-sender then - sends a RCPT command identifying a recipient of the mail. If the - SMTP-receiver can accept mail for that recipient it responds with an - OK reply; if not, it responds with a reply rejecting that recipient - (but not the whole mail transaction). The SMTP-sender and - SMTP-receiver may negotiate several recipients. When the recipients - have been negotiated the SMTP-sender sends the mail data, terminating - with a special sequence. If the SMTP-receiver successfully processes - the mail data it responds with an OK reply. The dialog is purposely - lock-step, one-at-a-time. - - ------------------------------------------------------------- - - - +----------+ +----------+ - +------+ | | | | - | User |<-->| | SMTP | | - +------+ | Sender- |Commands/Replies| Receiver-| - +------+ | SMTP |<-------------->| SMTP | +------+ - | File |<-->| | and Mail | |<-->| File | - |System| | | | | |System| - +------+ +----------+ +----------+ +------+ - - - Sender-SMTP Receiver-SMTP - - Model for SMTP Use - - Figure 1 - - ------------------------------------------------------------- - - The SMTP provides mechanisms for the transmission of mail; directly - from the sending user's host to the receiving user's host when the - - - -[Page 2] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - two host are connected to the same transport service, or via one or - more relay SMTP-servers when the source and destination hosts are not - connected to the same transport service. - - To be able to provide the relay capability the SMTP-server must be - supplied with the name of the ultimate destination host as well as - the destination mailbox name. - - The argument to the MAIL command is a reverse-path, which specifies - who the mail is from. The argument to the RCPT command is a - forward-path, which specifies who the mail is to. The forward-path - is a source route while the reverse-path, is a return route (which - may be used to return a message to the sender when an error occurs - with a relayed message). - - When the same message is sent to multiple recipients the SMTP - encourages the transmission of only one copy of the data for all the - recipients at the same destination host. - - The mail commands and replies have a rigid syntax. Replies also have - a numeric code. In the following, examples appear which use actual - commands and replies. The complete lists of commands and replies - appears in Section 4 on specifications. - - Commands and replies are not case sensitive. That is, a command or - reply word may be upper case, lower case, or any mixture of upper and - lower case. Note that this is not true of mailbox user names. For - some hosts the user name is case sensitive, and SMTP implementations - must take case to preserve the case of user names as they appear in - mailbox arguments. Host names are not case sensitive. - - Commands and replies are composed of characters from the ASCII - character set [1]. Each 7-bit character is transmitted right - justified in an 8-bit byte (or octet) with the high order bit cleared - to zero. - - When specifying the general form of a command or reply, an argument - (or special symbol) will be denoted by a meta-linguistic variable (or - constant), for example, "" or "". Here the - angle brackets indicate these are a meta-linguistic variables. - However, some arguments use the angle brackets literally. For - example, an actual reverse-path is enclosed in angle brackets, i.e., - "" is an instance of (the angle brackets - are actually transmitted in the command or reply). - - - - - -Postel [Page 3] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - -3. THE SMTP PROCEDURES - - This section presents the procedures used in SMTP in several parts. - First comes the basic mail procedure defined as a mail transaction. - Following this are descriptions of forwarding mail, verifying mailbox - names and expanding mailing lists, sending to terminals instead of or - in combination with mailboxes, and the opening and closing exchanges. - At the end of this section are comments on relaying, and a note on - mail domains. Throughout this section are examples of partial - command and reply sequences, several complete scenarios are presented - in Appendix F. - - 3.1. MAIL - - There are three steps to a SMTP mail transaction. The transaction - is started with a MAIL command which gives the sender - identification. A series of one or more RCPT commands follow - giving the receiver information. Then a DATA command gives the - mail data. And finally, the end of mail data indicator confirms - the transaction. - - The first step in the procedure is the MAIL command. The - contains the source mailbox. - - MAIL FROM: - - This command tells the the SMTP-receiver that a new mail - transaction is starting and to reset all its state tables and - buffers including any recipients or mail data. It gives the - reverse-path which can be used to report errors. If accepted, - the receiver-SMTP returns a 250 OK reply. - - The can contain more than just a mailbox. The - is a reverse source routing list of hosts and - source mailbox. The first host in the should be - the host sending this command. - - The second step in the procedure is the RCPT command. - - RCPT TO: - - This command gives a forward-path identifying one recipient. - If accepted, the receiver-SMTP returns a 250 OK reply, and - stores the forward-path. If the recipient is unknown the - receiver-SMTP returns a 550 Failure reply. This second step of - the procedure can be repeated any number of times. - - - -[Page 4] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - The can contain more than just a mailbox. The - is a source routing list of hosts and - destination mailbox. The first host in the - should be the host receiving this command. - - The third step in the procedure is the DATA command. - - DATA - - If accepted, the receiver-SMTP returns a 354 Intermediate reply - and considers all succeeding lines to be the message text. - When the end of text is received and stored the SMTP-receiver - sends a 250 OK reply. - - Since the mail data is sent on the transmission channel the end - of the mail data must be indicated so that the command and - reply dialog can be resumed. SMTP indicates the end of the - mail data by sending a line containing only a period. A - transparency procedure is used to prevent this interfering with - the user's text (see Section 4.5.2). - - Please note that the mail data includes the memo header - items such as Date, Subject, To, Cc, From [2]. - - The end of mail data indicator also confirms the mail - transaction and tells the receiver-SMTP to now process the - stored recipients and mail data. If accepted, the - receiver-SMTP returns a 250 OK reply. The DATA command should - fail only if the mail transaction was incomplete (for example, - no recipients), or if resources are not available. - - The above procedure is an example of a SMTP mail transaction. - These commands must be used only in the order discussed above. - Example 1 (below) illustrates the use of these commands in a mail - transaction. - - - - - - - - - - - - - - -Postel [Page 5] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - ------------------------------------------------------------- - - Example of the SMTP Procedure - - This SMTP example shows mail sent by Smith at host Alpha, to - Jones, Green, and Brown at host Beta. Here we assume that host - Alpha contacts host Beta directly. - - S: MAIL FROM: - R: 250 OK - - S: RCPT TO: - R: 250 OK - - S: RCPT TO: - R: 550 No such user here - - S: RCPT TO: - R: 250 OK - - S: DATA - R: 354 Start mail input; end with . - S: Blah blah blah... - S: ...etc. etc. etc. - S: . - R: 250 OK - - The mail has now been accepted for Jones and Brown. Green did - not have a mailbox at host Beta. - - Example 1 - - ------------------------------------------------------------- - - - - - - - - - - - - - - - - -[Page 6] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - 3.2. FORWARDING - - There are some cases where the destination information in the - is incorrect, but the receiver-SMTP knows the - correct destination. In such cases, one the following replies - should be used to allow the sender to contact the correct - destination. - - 251 User not local; will forward to - - This reply indicates that the receiver-SMTP knows the user's - mailbox is on another host and indicates the correct - forward-path to use in the future. Note that either the - host or user or both may be different. The receiver takes - responsibility for delivering the message. - - 551 User not local; please try - - This reply indicates that the receiver-SMTP knows the user's - mailbox is on another host and indicates the correct - forward-path to use. Note that either the host or user or - both may be different. The receiver refuses to accept mail - for this user, and the sender must either redirect the mail - according to the information provided or return an error - response to the originating user. - - Example 2 illustrates the use of these responses. - - ------------------------------------------------------------- - - Example of Forwarding - - Either - - S: RCPT TO: - R: 251 User not local; will forward to - - Or - - S: RCPT TO: - R: 551 User not local; please try - - Example 2 - - ------------------------------------------------------------- - - - - -Postel [Page 7] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - 3.3. VERIFYING AND EXPANDING - - SMTP provides as additional features, commands to verify a user - name or expand a mailing list. This is done with the VRFY and - EXPN commands, which have a character string arguments. For the - VRFY command, the string is a user name, and the the response may - include the full name of the user and must include the mailbox of - the user. For the EXPN command, the string identifies a mailing - list, and the multiline response may include the full name of the - users and must give the mailboxes on the mailing list. - - The case of verifying a user name is straightforward as shown in - example 3. - - ------------------------------------------------------------- - - Example of Verifying a User Name - - Either - - S: VRFY Postel - R: 250 Jon Postel - - Or - - S: VRFY Jones - R: 550 String does not match anything. - - Example 3 - - ------------------------------------------------------------- - - - - - - - - - - - - - - - - - - -[Page 8] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - The case of expanding a mailbox list requires a multiline reply as - shown in example 4. - - ------------------------------------------------------------- - - Example of Expanding a Mailing List - - Either - - S: EXPN Example-People - R: 250-Jon Postel - R: 250-Fred Fonebone - R: 250-Sam Q. Smith - R: 250-Quincy Smith <@ISIF,Q-Smith@ISI-VAXA> - R: 250- - R: 250 - - Or - - S: EXPN Executive-Washroom-List - R: 550 Access Denied to You. - - Example 4 - - ------------------------------------------------------------- - - The character string arguments of the VRFY and EXPN commands - cannot be further restricted due to the variety of implementations - of the user name and mailbox list concepts. On some systems it - may be appropriate for the argument of the EXPN command to be a - file name for a file containing a mailing list, but again there is - a variety of file naming conventions in the internet. - - The VRFY and EXPN commands are not included in the minimum - implementation (Section 4.5.1), and are not required to work - across relays when they are implemented. - - - - - - - - - - - - - -Postel [Page 9] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - 3.4. SENDING AND MAILING - - The main purpose of SMTP is to deliver messages to user's - mailboxes. A very similar service provided by some hosts is to - deliver messages to user's terminals (provided the user is active - on the host). The delivery to the user's mailbox is called - "mailing", the delivery to the user's terminal is called - "sending". Because in many hosts the implementation of sending is - nearly identical to the implementation of mailing these two - functions are combined in SMTP. However the sending commands are - not included in the required minimum implementation - (Section 4.5.1). User's should have the ability to control the - writing of messages on their terminals. Most hosts permit the - user's to accept or refuse such messages. - - The following three command are defined to support the sending - options, these are used in the mail transaction instead of the - MAIL command and inform the receiver-SMTP of the special semantics - of this transaction: - - SEND FROM: - - The SEND command requires that the mail data be delivered to - the user's terminal. If the user is not active (or not - accepting terminal messages) on the host a 450 reply may - returned to a RCPT command. The mail transaction is - successful if the message is delivered the terminal. - - SOML FROM: - - The Send Or MaiL command requires that the mail data be - delivered to the user's terminal if the user is active (and - accepting terminal messages) on the host. If the user is - not active (or not accepting terminal messages) then the - mail data is entered into the user's mailbox. The mail - transaction is successful if the message is delivered either - to the terminal or the mailbox. - - SAML FROM: - - The Send And MaiL command requires that the mail data be - delivered to the user's terminal if the user is active (and - accepting terminal messages) on the host. In any case the - mail data is entered into the user's mailbox. The mail - transaction is successful if the message is delivered the - mailbox. - - - -[Page 10] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - The same reply codes that are used for the MAIL commands are used - for these commands. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Postel [Page 11] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - 3.5. OPENING AND CLOSING - - At the time the transmission channel is opened there is an - exchange to ensure that the hosts are communicating with the hosts - they think they are. - - The following two commands are used in transmission channel - opening and closing: - - HELO - - QUIT - - In the HELO command the host sending the command identifies - itself; the command may be interpreted as saying "Hello, i am - ". - - ------------------------------------------------------------- - - Example of Connection Opening - - R: 220 BBN-UNIX Simple Mail Transfer Service Ready - S: HELO USC-ISIF - R: 250 BBN-UNIX - - Example 5 - - ------------------------------------------------------------- - - ------------------------------------------------------------- - - Example of Connection Closing - - S: QUIT - R: 221 BBN-UNIX Service closing transmission channel - - Example 6 - - ------------------------------------------------------------- - - - - - - - - - - -[Page 12] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - 3.6. RELAYING - - The forward-path may be a source route of the form - "@ONE,@TWO,JOE@THREE", where ONE, TWO, and THREE are hosts. This - form is used to emphasize the distinction between an address and a - route. The mailbox is an absolute address, and the route is - information about how to get there. The two concepts should not - be confused. - - The elements of the forward-path are moved to the reverse-path as - the message is relayed from one server-SMTP to another. The - reverse-path is a reverse source route, (i.e., a source route from - the current location of the message to the originator of the - message). When a server-SMTP deletes its identifier from the - forward-path and inserts it into the reverse-path, it must use the - name it is known by in the environment it is sending into, not the - environment the mail came from, in case the server-SMTP is known - by different names in different environments. - - Using source routing the receiver-SMTP receives mail to be relayed - to another server-SMTP The receiver-SMTP may accept or reject the - task of relaying the mail in the same way it accepts or rejects - mail for a local user. The receiver-SMTP transforms the command - arguments by moving its own identifier from the forward-path to - the beginning of the reverse-path. The receiver-SMTP then becomes - a sender-SMTP, establishes a transmission channel to the next SMTP - in the forward-path, and sends it the mail. - - The first host in the reverse-path should be the host sending the - SMTP commands, and the first host in the forward-path should be - the host receiving the SMTP commands. - - Notice that the forward-path and reverse-path appear in the SMTP - commands and replies, but not necessarily in the message. That - is, there is no need for these paths and especially this syntax to - appear in the "To:" , "From:", "CC:", etc. fields of the message - header. - - If a server-SMTP has accepted the task of relaying the mail and - later finds that the forward-path is incorrect or that the mail - cannot be delivered for whatever reason, then it must construct an - "undeliverable mail" notification message and send it to the - originator of the undeliverable mail (as indicated by the - reverse-path). - - - - - -Postel [Page 13] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - This notification message must be from the server-SMTP at this - host. Of course, server-SMTPs should not send notification - messages about problems with notification messages. One way to - prevent loops in error reporting is to specify a null reverse-path - in the MAIL command of a notification message. When such a - message is relayed it is permissible to leave the reverse-path - null. A MAIL command with a null reverse-path appears as follows: - - MAIL FROM:<> - - An undeliverable mail notification message is shown in example 7. - This notification is in response to a message originated by JOE at - HOSTW and sent via HOSTX to HOSTY with instructions to relay it on - to HOSTZ. What we see in the example is the transaction between - HOSTY and HOSTX, which is the first step in the return of the - notification message. - - ------------------------------------------------------------- - - Example Undeliverable Mail Notification Message - - S: MAIL FROM:<> - R: 250 ok - S: RCPT TO:<@HOSTX,JOE@HOSTW> - R: 250 ok - S: DATA - R: 354 send the mail data, end with . - S: Date: 23 Oct 81 - S: Sender: SMTP@HOSTY - S: Subject: Mail System Problem - S: - S: Sorry JOE, your message to SAM@HOSTZ lost. - S: HOSTZ said this: - S: "550 No Such User" - S: . - R: 250 ok - - Example 7 - - ------------------------------------------------------------- - - - - - - - - - -[Page 14] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - 3.7. DOMAINS - - At some not too distant future time it might be necessary to - expand the mailbox format to include a region or name domain - identifier. There is quite a bit of discussion on this at - present, and is likely that SMTP will be revised in the future to - take into account naming domains. - - The examples in this document do not show mail domains. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Postel [Page 15] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - -4. THE SMTP SPECIFICATIONS - - 4.1. SMTP COMMANDS - - 4.1.1. COMMAND SEMANTICS - - The SMTP commands define the mail transfer or the mail system - function requested by the user. SMTP commands are character - strings terminated by . The command codes themselves are - alphabetic characters terminated by if parameters follow - and otherwise. The syntax of mailboxes must conform to - receiver site conventions. The SMTP commands are discussed - below. The SMTP replies are discussed in the Section 4.2. - - A mail transaction involves several data objects which are - communicated as arguments to different commands. The - reverse-path is the argument of the MAIL command, the - forward-path is the argument of the RCPT command, and the mail - data is the argument of the DATA command. These arguments or - data objects must be transmitted and held pending the - confirmation communicated by the end of mail data indication - which finalizes the transaction. The model for this is that - distinct buffers are provided to hold the types of data - objects, that is, there is a reverse-path buffer, a - forward-path buffer, and a mail data buffer. Specific commands - cause information to be appended to a specific buffer, or cause - one or more buffers to be cleared. - - HELLO (HELO) - - This command is used to identify the sender-SMTP to the - receiver-SMTP. The argument field contains the host name of - the sender-SMTP. - - The receiver-SMTP identifies itself to the sender-SMTP in - the connection greeting reply, and in the response to this - command. - - MAIL (MAIL) - - This command is used to initiate a mail transaction in which - the mail data is delivered to one or more mailboxes. The - argument field contains a reverse-path. - - The reverse-path consists of an optional list of hosts and - the sender mailbox. When the list of hosts is present, it - - - -[Page 16] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - is a "reverse" source route and indicates that the mail was - relayed through each host on the list (the first host in the - list was the most recent relay). This list is used as a - source route to return non-delivery notices to the sender. - As each relay host adds itself to the beginning of the list, - it must use its name as known in the IPCE to which it is - relaying the mail rather than the IPCE from which the mail - came (if they are different). In some types of error - reporting messages (for example, undeliverable mail - notifications) the reverse-path may be null (see Example 7). - - This command clears the reverse-path buffer, the - forward-path buffer, and the mail data buffer; and inserts - the reverse-path information from this command into the - reverse-path buffer. - - RECIPIENT (RCPT) - - This command is used to identify an individual recipient of - the mail data; multiple recipients are specified by multiple - use of this command. - - The forward-path consists of an optional list of hosts and a - required destination mailbox. When the list of hosts is - present, it is a source route and indicates that the mail - must be relayed to the next host on the list. If the - receiver-SMTP is does not implement the relay function it - may user the same reply it would for an unknown local user - (550). - - When mail is relayed, the relay host must remove itself from - the beginning forward-path and put itself at the beginning - of the reverse-path. When mail reaches its ultimate - destination (the forward-path contains only a destination - mailbox), the receiver-SMTP inserts it into the destination - mailbox in accordance with its host mail conventions. - - For example, mail received at relay host A with arguments - - FROM: - TO:<@A,@B,C@D> - - will be relayed on to host B with arguments - - FROM:<@A,X@Y> - TO:<@B,C@D>. - - - -Postel [Page 17] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - This command causes its forward-path argument to be appended - to the forward-path buffer. - - DATA (DATA) - - The receiver treats the lines following the command as mail - data from the sender. This command causes the mail data - from this command to be appended to the mail data buffer. - The mail data may contain any of the 128 ASCII character - codes. - - The mail data is terminated by a line containing only a - period, that is the character sequence "." (see - Section 4.5.2 on Transparency). This is the end of mail - data indication. - - The end of mail data indication requires that the receiver - must now process the stored mail transaction information. - This processing consumes the information in the reverse-path - buffer, the forward-path buffer, and the mail data buffer, - and on the completion of this command these buffers are - cleared. If the processing is successful the receiver must - send an OK reply. If the processing fails completely the - receiver must send a failure reply. - - When the receiver-SMTP accepts a message either for relaying - or for final delivery it inserts at the beginning of the - mail data a time stamp line. The time stamp line indicates - the identity of the host that sent the message, and the - identity of the host that received the message (and is - inserting this time stamp), and the date and time the - message was received. Relayed messages will have multiple - time stamp lines. - - When the receiver-SMTP makes the "final delivery" of a - message it inserts at the beginning of the mail data a - return path line. The return path line preserves the - information in the from the MAIL command. - Here, final delivery means the message leaves the SMTP - world. Normally, this would mean it has been delivered to - the destination user, but in some cases it may be further - processed and transmitted by another mail system. - - The preceding two paragraphs imply that the final mail data - - - - - -[Page 18] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - will begin with a return path line, followed by one or more - time stamp lines. These lines will be followed by the mail - data header and body [2]. For example: - - Return-Path: <@GHI,@DEF,@ABC,JOE@ABC> - Mail-From: GHI received by JKL at 27-Oct-81 15:27:39-PST - Mail-From: DEF received by GHI at 27-Oct-81 15:15:13-PST - Mail-From: ABC received by DEF at 27-Oct-81 15:01:59-PST - Date: 27-Oct-81 15:01:01-PST - From: JOE@ABC - Subject: Improved Mailing System Installed - To: SAM@JKL - - This is to inform you that ... - - Special mention is needed of the response and further action - required when the processing following the end of mail data - indication is partially successful. This could arise if - after accepting several recipients and the mail data, the - receiver-SMTP finds that the mail data can be successfully - delivered to some of the recipients, but it cannot be to - others (for example, due to mailbox space allocation - problems). In such a situation, the response to the DATA - command must be an OK reply. But, the receiver-SMTP must - compose and send an "undeliverable mail" notification - message to the originator of the message. Either a single - notification which lists all of the recipients that failed - to get the message, or separate notification messages must - be sent for each failed recipient (see Example 7). All - undeliverable mail notification messages are sent using the - MAIL command (even if they result from processing a SEND, - SOML, or SAML command). - - SEND (SEND) - - This command is used to initiate a mail transaction in which - the mail data is delivered to one or more terminals. The - argument field contains a reverse-path. This command is - successful if the message is delivered to the terminal. - - The reverse-path consists of an optional list of hosts and - the sender mailbox. When the list of hosts is present, it - is a "reverse" source route and indicates that the mail was - relayed through each host on the list (the first host in the - list was the most recent relay). This list is used as a - source route to return non-delivery notices to the sender. - - - -Postel [Page 19] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - As each relay host adds itself to the beginning of the list, - it must use its name as known in the IPCE to which it is - relaying the mail rather than the IPCE from which the mail - came (if they are different). - - This command clears the reverse-path buffer, the - forward-path buffer, and the mail data buffer; and inserts - the reverse-path information from this command into the - reverse-path buffer. - - SEND OR MAIL (SOML) - - This command is used to initiate a mail transaction in which - the mail data is delivered to one or more terminals or - mailboxes. For each recipient the mail data is delivered to - the recipient's terminal if the recipient is active on the - host (and accepting terminal messages), otherwise to the - recipient's mailbox. The argument field contains a - reverse-path. This command is successful if the message is - delivered to the terminal or the mailbox. - - The reverse-path consists of an optional list of hosts and - the sender mailbox. When the list of hosts is present, it - is a "reverse" source route and indicates that the mail was - relayed through each host on the list (the first host in the - list was the most recent relay). This list is used as a - source route to return non-delivery notices to the sender. - As each relay host adds itself to the beginning of the list, - it must use its name as known in the IPCE to which it is - relaying the mail rather than the IPCE from which the mail - came (if they are different). - - This command clears the reverse-path buffer, the - forward-path buffer, and the mail data buffer; and inserts - the reverse-path information from this command into the - reverse-path buffer. - - SEND AND MAIL (SAML) - - This command is used to initiate a mail transaction in which - the mail data is delivered to one or more terminals and - mailboxes. For each recipient the mail data is delivered to - the recipient's terminal if the recipient is active on the - host (and accepting terminal messages), and for all - - - - - -[Page 20] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - recipients to the recipient's mailbox. The argument field - contains a reverse-path. This command is successful if the - message is delivered to the mailbox. - - The reverse-path consists of an optional list of hosts and - the sender mailbox. When the list of hosts is present, it - is a "reverse" source route and indicates that the mail was - relayed through each host on the list (the first host in the - list was the most recent relay). This list is used as a - source route to return non-delivery notices to the sender. - As each relay host adds itself to the beginning of the list, - it must use its name as known in the IPCE to which it is - relaying the mail rather than the IPCE from which the mail - came (if they are different). - - This command clears the reverse-path buffer, the - forward-path buffer, and the mail data buffer; and inserts - the reverse-path information from this command into the - reverse-path buffer. - - RESET (RSET) - - This command specifies that the current mail transaction is - to be aborted. Any stored sender, recipients, and mail data - must be discarded, and all buffers and state tables cleared. - The receiver must send an OK reply. - - VERIFY (VRFY) - - This command asks the receiver to confirm that the argument - identifies a user. If it is a user name, the full name of - the user (if known) and the fully specified mailbox are - returned. - - This command has no effect on any of the reverse-path - buffer, the forward-path buffer, or the mail data buffer. - - EXPAND (EXPN) - - This command asks the receiver to confirm that the argument - identifies a mailing list, and if so, to return the - membership of that list. The full name of the users (if - known) and the fully specified mailboxes are returned in a - multiline reply. - - - - - -Postel [Page 21] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - This command has no effect on any of the reverse-path - buffer, the forward-path buffer, or the mail data buffer. - - HELP (HELP) - - This command causes the receiver to send helpful information - to the sender of the HELP command. The command may take an - argument (e.g., any command name) and return more specific - information as a response. - - This command has no effect on any of the reverse-path - buffer, the forward-path buffer, or the mail data buffer. - - NOOP (NOOP) - - This command does not affect any parameters or previously - entered commands. It specifies no action other than that - the receiver send an OK reply. - - This command has no effect on any of the reverse-path - buffer, the forward-path buffer, or the mail data buffer. - - QUIT (QUIT) - - This command specifies that the receiver must send an OK - reply, and then close the transmission channel. - - The receiver should not close the transmission channel until - it receives and replies to a QUIT command (even if there was - an error). The sender should not close the transmission - channel until it send a QUIT command and receives the reply - (even if there was an error response to a previous command). - If the connection is closed prematurely the receiver should - act as if a RSET command had been received (canceling any - pending transaction, but not undoing any previously - completed transaction), the sender should act as if the - command or transaction in progress had received a temporary - error (4xx). - - There are restrictions on the order in which these command may - be used. - - The first command in a session must be the HELO command. - The HELO command may be used later in a session as well. - - - - - -[Page 22] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - The NOOP, HELP, EXPN, and VRFY commands can be used at any - time during a session. - - The MAIL, SEND, SOML, or SAML commands begin a mail - transaction. Once started a mail transaction consists of - one of the transaction beginning commands, one or more RCPT - commands, and a DATA command, in that order. A mail - transaction may be aborted by the RSET command. There may - be zero or more transactions in a session. - - The last command in a session must be the QUIT command. The - QUIT command can not be used at any other time in a session. - - 4.1.2. COMMAND SYNTAX - - The commands consist of a command code followed by an argument - field. Command codes are four alphabetic characters. Upper - and lower case alphabetic characters are to be treated - identically. Thus, any of the following may represent the mail - command: - - MAIL Mail mail MaIl mAIl - - This also applies to any symbols representing parameter values, - such as "TO" or "to" for the forward-path. Command codes and - the argument fields are separated by one or more spaces. - However, within the reverse-path and forward-path arguments - case is important. In particular, in some hosts the user - "smith" is different from the user "Smith". - - The argument field consists of a variable length character - string ending with the character sequence . The receiver - is to take no action until this sequence is received. - - Square brackets denote an optional argument field. If the - option is not taken, the appropriate default is implied. - - - - - - - - - - - - - -Postel [Page 23] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - The following are the SMTP commands: - - HELO - - MAIL FROM: - - RCPT TO: - - DATA - - RSET - - SEND FROM: - - SOML FROM: - - SAML FROM: - - VRFY - - EXPN - - HELP [ ] - - NOOP - - QUIT - - - - - - - - - - - - - - - - - - - - - - -[Page 24] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - The syntax of the above argument fields (using BNF notation - where applicable) is given below. The "..." notation indicates - that a field may be repeated one or more times. - - ::= - - ::= - - ::= "<" ["@" "," ...] ">" - - ::= | "#" | "[" "]" - - ::= "@" - - ::= - - ::= | - - ::= | '\' | '\' - - ::= "." "." "." - - ::= | - - ::= three digits representing a decimal integer value - in the range 0 through 255 - - ::= any one of the 52 alphabetic characters A through Z - in upper case and a through z in lower case - - ::= any one of the 128 ASCII characters except - - - ::= any one of the ten digits 0 through 9 - - ::= any one of - - ::= '<', '>', '(', ')', '\', ',', ';', ':', '@', - '"', and the control characters (ASCII codes 0 through 37 - octal inclusive and 177 octal) - - Note that the backslash, '\', is a quote character, which is - used to indicate that the next character is to be used - literally (instead of its normal interpretation). For example, - "Joe\,Smith" could be used to indicate a single nine character - user field with comma being the fourth character of the field. - - - -Postel [Page 25] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - Hosts are generally known by names which are translated to - addresses in each host. Sometimes a host is not known to the - translation function and communication is blocked. To bypass - this barrier two numeric forms are also allowed for host - "names". One form is a decimal integer prefixed by a pound - sign, "#", which indicates the number is the address of the - host. Another form is four small decimal integers separated by - dots and enclosed by brackets, e.g., "[123.255.37.2]", which - indicates a 32-bit ARPA Internet Address in four 8-bit fields. - - The time stamp line and the return path line are formally - defined as follows: - - ::= "Return-Path:" - - ::= "Mail-From:" - - ::= [] - - ::= "host" - - ::= - - ::= "received by" - - ::= "TCP" | "NCP" | "NITS" | "X25" | "INTERNET" | - "ARPANET" - - Note: INTERNET = TCP, ARPANET = NCP, and if the is - not present INTERNET is assumed. - - ::= "at"

    "-" "-" - -
    ::= the one or two decimal integer day of the month in - the range 1 to 31. - - ::= "JAN" | "FEB" | "MAR" | "APR" | "MAY" | "JUN" | - "JUL" | "AUG" | "SEP" | "OCT" | "NOV" | "DEC" - - ::= the two decimal integer year of the century in the - range 01 to 99. - - - - -[Page 26] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - ::= the two decimal integer hour of the day in the - range 00 to 24. - - ::= the two decimal integer minute of the hour in the - range 00 to 59. - - ::= the two decimal integer second of the minute in the - range 00 to 59. - - ::= a time zone designator (as in [2]) or "UT" for - Universal Time (the default). - - Return Path Example: - - Return-Path: <@CHARLIE,@BAKER,JOE@ABLE> - - Mail From Example: - - Mail-From: ABC received by XYZ at 22-OCT-81 09:23:59-PDT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Postel [Page 27] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - 4.2. SMTP REPLIES - - Replies to SMTP commands are devised to ensure the synchronization - of requests and actions in the process of mail transfer, and to - guarantee that the sender-SMTP always knows the state of the - receiver-SMTP. Every command must generate exactly one reply. - - The details of the command-reply sequence are made explicit in - Section 5.3 on Sequencing and Section 5.4 State Diagrams. - - An SMTP reply consists of a three digit number (transmitted as - three alphanumeric characters) followed by some text. The number - is intended for use by automata to determine what state to enter - next; the text is meant for the human user. It is intended that - the three digits contain enough encoded information that the - sender-SMTP need not examine the text and may either discard it or - pass it on to the user, as appropriate. In particular, the text - may be receiver-dependent, so there are likely to be varying texts - for each reply code. A discussion of the theory of reply codes is - given in the Appendix E. Formally, a reply is defined to be the - sequence: a three-digit code, , one line of text, and , - or a multiline reply (as defined in Appendix E). Only the EXPN - and HELP command are expected to result in multiline replies in - normal circumstances, however multiline replies are allowed for - any command. - - - - - - - - - - - - - - - - - - - - - - - - -[Page 28] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - 4.2.1. REPLY CODES BY FUNCTION GROUPS - - 500 Syntax error, command unrecognized - [This may include errors such as command line too long] - 501 Syntax error in parameters or arguments - 502 Command not implemented - 503 Bad sequence of commands - 504 Command parameter not implemented - - 211 System status, or system help reply - 214 Help message - [Information on how to use the receiver or the meaning of a - particular non-standard command; this reply is useful only - to the human user] - - 220 Service ready - 221 Service closing transmission channel - 421 Service not available, closing transmission channel - [This may be a reply to any command if the service knows it - must shut down] - - 250 Requested mail action okay, completed - 251 User not local; will forward to - 450 Requested mail action not taken: mailbox unavailable - [E.g., mailbox busy] - 550 Requested action not taken: mailbox unavailable - [E.g., mailbox not found, no access] - 451 Requested action aborted: error in processing - 551 User not local; please try - 452 Requested action not taken: insufficient system storage - 552 Requested mail action aborted: exceeded storage allocation - 553 Requested action not taken: mailbox name not allowed - [E.g., mailbox syntax incorrect] - 354 Start mail input; end with . - 554 Transaction failed - - - - - - - - - - - - - - -Postel [Page 29] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - 4.2.2. NUMERIC ORDER LIST OF REPLY CODES - - 211 System status, or system help reply - 214 Help message - [Information on how to use the receiver or the meaning of a - particular non-standard command; this reply is useful only - to the human user] - 220 Service ready - 221 Service closing transmission channel - 250 Requested mail action okay, completed - 251 User not local; will forward to - - 354 Start mail input; end with . - - 421 Service not available, closing transmission channel - [This may be a reply to any command if the service knows it - must shut down] - 450 Requested mail action not taken: mailbox unavailable - [E.g., mailbox busy] - 451 Requested action aborted: local error in processing - 452 Requested action not taken: insufficient system storage - - 500 Syntax error, command unrecognized - [This may include errors such as command line too long] - 501 Syntax error in parameters or arguments - 502 Command not implemented - 503 Bad sequence of commands - 504 Command parameter not implemented - 550 Requested action not taken: mailbox unavailable - [E.g., mailbox not found, no access] - 551 User not local; please try - 552 Requested mail action aborted: exceeded storage allocation - 553 Requested action not taken: mailbox name not allowed - [E.g., mailbox syntax incorrect] - 554 Transaction failed - - - - - - - - - - - - - - -[Page 30] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - 4.3. SEQUENCING OF COMMANDS AND REPLIES - - The communication between the sender and receiver is intended to - be an alternating dialogue, controlled by the sender. As such, - the sender issues a command and the receiver responds with a - reply. The sender must wait for this response before sending - further commands. - - One important reply is the connection greeting. Normally, a - receiver will send a 220 "Awaiting input" reply when the - connection is completed. The sender should wait for this greeting - message before sending any commands. - - Note: all the greeting type replies have the official name of - the server host as the first word following the reply code. - - For example, - - 220 USC-ISIF Service ready - - The table below lists alternative success and failure replies for - each command. These must be strictly adhered to; a receiver may - substitute text in the replies, but the meaning and action implied - by the code numbers and by the specific command reply sequence - cannot be altered. - - COMMAND-REPLY SEQUENCES - - Each command is listed with its possible replies. The prefixes - used before the possible replies are "P" for preliminary (not - used in SMTP), "I" for intermediate, "S" for success, "F" for - failure, and "E" for error. The 421 reply (service not - available, closing transmission channel) may be given to any - command if the SMTP-receiver knows it must shut down. This - listing forms the basis for the State Diagrams in Section 4.4. - - CONNECTION ESTABLISHMENT - S: 220 - F: 421 - HELO - S: 250 - E: 500, 501, 504, 421 - MAIL - S: 250 - F: 552, 451, 452 - E: 500, 501, 421 - - - -Postel [Page 31] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - RCPT - S: 250, 251 - F: 550, 551, 552, 553, 450, 451, 452 - E: 500, 501, 421 - DATA - I: 354 -> data -> S: 250 - F: 552, 554, 451, 452 - F: 451, 554 - E: 500, 501, 421 - RSET - S: 250 - E: 500, 501, 504, 421 - SEND - S: 250 - F: 552, 451, 452 - E: 500, 501, 502, 421 - SOML - S: 250 - F: 552, 451, 452 - E: 500, 501, 502, 421 - SAML - S: 250 - F: 552, 451, 452 - E: 500, 501, 502, 421 - VRFY - S: 250 - F: 550 - E: 500, 501, 502, 504, 421 - EXPN - S: 250 - F: 550 - E: 500, 501, 502, 504, 421 - HELP - S: 211, 214 - E: 500, 501, 502, 504, 421 - NOOP - S: 250 - E: 500, 421 - QUIT - S: 221 - E: 500 - - - - - - - - -[Page 32] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - 4.4. STATE DIAGRAMS - - Following are state diagrams for a simple-minded SMTP - implementation. Only the first digit of the reply codes is used. - There is one state diagram for each group of SMTP commands. The - command groupings were determined by constructing a model for each - command and then collecting together the commands with - structurally identical models. - - For each command there are three possible outcomes: "success" - (S), "failure" (F), and "error" (E). In the state diagrams below - we use the symbol B for "begin", and the symbol W for "wait for - reply". - - First, the diagram that represents most of the SMTP commands: - - - 1,3 +---+ - ----------->| E | - | +---+ - | - +---+ cmd +---+ 2 +---+ - | B |---------->| W |---------->| S | - +---+ +---+ +---+ - | - | 4,5 +---+ - ----------->| F | - +---+ - - - This diagram models the commands: - - HELO, MAIL, RCPT, RSET, SEND, SOML, SAML, VRFY, EXPN, HELP, - NOOP, QUIT. - - - - - - - - - - - - - - - -Postel [Page 33] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - A more complex diagram models the DATA command: - - - +---+ DATA +---+ 1,2 +---+ - | B |---------->| W |-------------------->| E | - +---+ +---+ ------------>+---+ - 3| |4,5 | - | | | - -------------- ----- | - | | | +---+ - | ---------- -------->| S | - | | | | +---+ - | | ------------ - | | | | - V 1,3| |2 | - +---+ data +---+ --------------->+---+ - | |---------->| W | | F | - +---+ +---+-------------------->+---+ - 4,5 - - - Note that the "data" here is a series of lines sent from the - sender to the receiver with no response expected until the last - line is sent. - - - - - - - - - - - - - - - - - - - - - - - - - -[Page 34] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - 4.5. DETAILS - - 4.5.1. MINIMUM IMPLEMENTATION - - In order to make SMTP workable, the following minimum - implementation is required for all receivers: - - COMMANDS -- HELO - MAIL - RCPT - DATA - RSET - NOOP - QUIT - - 4.5.2. TRANSPARENCY - - Without some provision for data transparency the character - sequence "." ends the the mail text and cannot be - sent by the user. In general, users are not aware of such - "forbidden" sequences. To allow all user composed text to be - transmitted transparently the following procedures are used. - - 1. Before sending a line of mail text the sender-SMTP checks - the first character of the line. If it is a period, one - additional period is inserted at the beginning of the line. - - 2. When a line of mail text is received by the receiver-SMTP - it checks the the line. If the line is composed of a single - period it is the end of mail. If the first character is a - period and there are other characters on the line, the first - character is deleted. - - The mail data may contain any of the 128 ASCII characters. All - characters are to be delivered to the recipients mailbox - including format effectors and other control characters. The - 7-bit ASCII codes are transmitted right justified in 8-bit - bytes (octets) with the high order bits cleared to zero. - - In some systems it may be necessary to transform the data as - it is received and stored. This may be necessary for hosts - that use a different character set than ASCII as their local - character set, or that store data in records rather than - strings. If such transforms are necessary, they must be - reversible -- especially if such transforms are applied to - mail being relayed. - - - -Postel [Page 35] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - 4.5.3. SIZES - - There are several objects that have required minimum maximum - sizes. That is every implementation must be able to receive - objects of at least these sizes, but must not send objects - larger than these sizes. - - - **************************************************** - * * - * TO THE MAXIMUM EXTENT POSSIBLE, IMPLEMENTATION * - * TECHNIQUES WHICH IMPOSE NO LIMITS ON THE LENGTH * - * OF THESE OBJECTS SHOULD BE USED. * - * * - **************************************************** - - user - - The maximum total length of a user name is 64 characters. - - host - - The maximum total length of a host name or number is 40 - characters. - - path - - The maximum total length of a reverse-path or - forward-path is 256 characters (including the punctuation - and element separators). - - command line - - The maximum total length of a command line including the - command word and the is 512 characters. - - reply line - - The maximum total length of a reply line including the - reply code and the is 512 characters. - - text line - - The maximum total length of a text line including the - is 1000 characters (but not counting the leading - dot duplicated for transparency). - - - -[Page 36] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - recipients buffer - - The maximum total number of recipients that must be - buffered is 100 recipients. - - - **************************************************** - * * - * TO THE MAXIMUM EXTENT POSSIBLE, IMPLEMENTATION * - * TECHNIQUES WHICH IMPOSE NO LIMITS ON THE LENGTH * - * OF THESE OBJECTS SHOULD BE USED. * - * * - **************************************************** - - Errors due to exceeding these limits may be reported by using - the reply codes, for example: - - 500 Line too long. - - 501 Path too long - - 552 Too many recipients. - - 552 Too much mail data. - - - - - - - - - - - - - - - - - - - - - - - - - -Postel [Page 37] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - -APPENDIX A - - TCP Transport service - - The Transmission Control Protocol [3] is used in the ARPA - Internet, and in any network following the US DoD standards for - internetwork protocols. - - Connection Establishment - - The SMTP transmission channel is a TCP connection established - between the sender process port U and the receiver process port - L. This single full duplex connection is used as the - transmission channel. This protocol is assigned the service - port 25 (31 octal), that is L=25. - - Data Transfer - - The TCP connection supports the transmission of 8-bit bytes. - The SMTP data is 7-bit ASCII characters. Each character is - transmitted as a 8-bit byte with the high-order bit cleared to - zero. - - - - - - - - - - - - - - - - - - - - - - - - - - - -[Page 38] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - -APPENDIX B - - NCP Transport service - - The ARPANET Host-to-Host Protocol [4] (implemented by the Network - Control Program) may be used in the ARPANET. - - Connection Establishment - - The SMTP transmission channel is established via NCP between - the the sender process socket U and receiver process socket L. - The Initial Connection Protocol [5] is followed resulting in a - pair of simplex connections. This pair of connections is used - as the transmission channel. This protocol is assigned the - contact socket 25 (31 octal), that is L=25. - - Data Transfer - - The NCP data connections are established in 8-bit byte mode. - The SMTP data is 7-bit ASCII characters. Each character is - transmitted as a 8-bit byte with the high-order bit cleared to - zero. - - - - - - - - - - - - - - - - - - - - - - - - - - - -Postel [Page 39] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - -APPENDIX C - - NITS - - The Network Independent Transport Service [6] may be used. - - Connection Establishment - - The SMTP transmission channel is established via NITS between - the sender process and receiver process. The sender process - executes the CONNECT primitive, and the waiting receiver - process executes the ACCEPT primitive. - - Data Transfer - - The NITS connection supports the transmission of 8-bit bytes. - The SMTP data is 7-bit ASCII characters. Each character is - transmitted as a 8-bit byte with the high-order bit cleared to - zero. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -[Page 40] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - -APPENDIX D - - X.25 Transport service - - It may be possible to use the X.25 service [7] as provided by the - Public Data Networks directly, but there are indications that it - is too error prone to qualify as a reliable channel. It is - suggested that a reliable end-to-end protocol such as TCP be used - on top of X.25 connections. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Postel [Page 41] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - -APPENDIX E - - Theory of Reply Codes - - The three digits of the reply each have a special significance. - The first digit denotes whether the response is good, bad or - incomplete. An unsophisticated sender-SMTP will be able to - determine its next action (proceed as planned, redo, retrench, - etc.) by simply examining this first digit. A sender-SMTP that - wants to know approximately what kind of error occurred (e.g., - mail system error, command syntax error) may examine the second - digit, reserving the third digit for the finest gradation of - information. - - There are five values for the first digit of the reply code: - - 1yz Positive Preliminary reply - - The command has been accepted, but the requested action - is being held in abeyance, pending confirmation of the - information in this reply. The sender-SMTP should send - another command specifying whether to continue or abort - the action. - - [Note: SMTP does not have any commands that allow this - type of reply, and so does not have the continue or - abort commands.] - - 2yz Positive Completion reply - - The requested action has been successfully completed. A - new request may be initiated. - - 3yz Positive Intermediate reply - - The command has been accepted, but the requested action - is being held in abeyance, pending receipt of further - information. The sender-SMTP should send another command - specifying this information. This reply is used in - command sequence groups. - - 4yz Transient Negative Completion reply - - The command was not accepted and the requested action did - not occur. However, the error condition is temporary and - the action may be requested again. The sender should - - - -[Page 42] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - return to the beginning of the command sequence (if any). - It is difficult to assign a meaning to "transient" when - two different sites (receiver- and sender- SMTPs) must - agree on the interpretation. Each reply in this category - might have a different time value, but the sender-SMTP is - encouraged to try again. A rule of thumb to determine if - a reply fits into the 4yz or the 5yz category (see below) - is that replies are 4yz if they can be repeated without - any change in command form or in properties of the sender - or receiver. (E.g., the command is repeated identically - and the receiver does not put up a new implementation.) - - 5yz Permanent Negative Completion reply - - The command was not accepted and the requested action did - not occur. The sender-SMTP is discouraged from repeating - the exact request (in the same sequence). Even some - "permanent" error conditions can be corrected, so the - human user may want to direct the sender-SMTP to - reinitiate the command sequence by direct action at some - point in the future (e.g., after the spelling has been - changed, or the user has altered the account status). - - The second digit encodes responses in specific categories: - - x0z Syntax -- These replies refer to syntax errors, - syntactically correct commands that don't fit any - functional category, and unimplemented or superfluous - commands. - - x1z Information -- These are replies to requests for - information, such as status or help. - - x2z Connections -- These are replies referring to the - transmission channel. - - x3z Unspecified as yet. - - x4z Unspecified as yet. - - x5z Mail system -- These replies indicate the status of - the receiver mail system vis-a-vis the requested - transfer or other mail system action. - - The third digit gives a finer gradation of meaning in each - category specified by the second digit. The list of replies - - - -Postel [Page 43] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - illustrates this. Each reply text is recommended rather than - mandatory, and may even change according to the command with - which it is associated. On the other hand, the reply codes - must strictly follow the specifications in this section. - Receiver implementations should not invent new codes for - slightly different situations from the ones described here, but - rather adapt codes already defined. - - For example, a command such as NOOP whose successful execution - does not offer the sender-SMTP any new information will return - a 250 reply. The response is 502 when the command requests an - unimplemented non-site-specific action. A refinement of that - is the 504 reply for a command that is implemented, but that - requests an unimplemented parameter. - - The reply text may be longer than a single line; in these cases - the complete text must be marked so the sender-SMTP knows when it - can stop reading the reply. This requires a special format to - indicate a multiple line reply. - - The format for multi-line replies requires that every line, - except the last, begin with the reply code, followed - immediately by a hyphen, "-" (also known as minus), followed by - text. The last line will begin with the reply code, followed - immediately by , optionally some text, and . - - For example: - 123-First line - 123-Second line - 123-234 text beginning with numbers - 123 The last line - - The sender-SMTP then simply needs to search for the reply code - followed by at the beginning of a line, and ignore all - preceding lines. - - - - - - - - - - - - - - -[Page 44] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - -APPENDIX F - - Scenarios - - This section presents complete scenarios of several types of SMTP - sessions. - - A Typical SMTP Transaction Scenario - - This SMTP example shows mail sent by Smith at host USC-ISIF, to - Jones, Green, and Brown at host BBN-UNIX. Here we assume that - host USC-ISIF contacts host BBN-UNIX directly. The mail is - accepted for Jones and Brown. Green does not have a mailbox at - host BBN-UNIX. - - ------------------------------------------------------------- - - R: 220 BBN-UNIX Simple Mail Transfer Service Ready - S: HELO USC-ISIF - R: 250 BBN-UNIX - - S: MAIL FROM: - R: 250 OK - - S: RCPT TO: - R: 250 OK - - S: RCPT TO: - R: 550 No such user here - - S: RCPT TO: - R: 250 OK - - S: DATA - R: 354 Start mail input; end with . - S: Blah blah blah... - S: ...etc. etc. etc. - S: . - R: 250 OK - - S: QUIT - R: 221 BBN-UNIX Service closing transmission channel - - Scenario 1 - - ------------------------------------------------------------- - - - -Postel [Page 45] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - Aborted SMTP Transaction Scenario - - ------------------------------------------------------------- - - R: 220 MIT-Multics Simple Mail Transfer Service Ready - S: HELO ISI-VAXA - R: 250 MIT-Multics - - S: MAIL FROM: - R: 250 OK - - S: RCPT TO: - R: 250 OK - - S: RCPT TO: - R: 550 No such user here - - S: RSET - R: 250 OK - - S: QUIT - R: 221 MIT-Multics Service closing transmission channel - - Scenario 2 - - ------------------------------------------------------------- - - - - - - - - - - - - - - - - - - - - - - - -[Page 46] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - Relayed Mail Scenario - - ------------------------------------------------------------- - - Step 1 -- Source Host to Relay Host - - R: 220 USC-ISIE Simple Mail Transfer Service Ready - S: HELO MIT-AI - R: 250 USC-ISIE - - S: MAIL FROM: - R: 250 OK - - S: RCPT TO:<@ISIE,Jones@BBN-VAX> - R: 250 OK - - S: DATA - R: 354 Start mail input; end with . - S: Date: 2-Nov-81 22:33:44 - S: From: John Q. Public - S: Subject: The Next Meeting of the Board - S: To: Jones at BBN-Vax - S: - S: Bill: - S: The next meeting of the board of directors will be - S: on Tuesday. - S: John. - S: . - R: 250 OK - - S: QUIT - R: 221 USC-ISIE Service closing transmission channel - - - - - - - - - - - - - - - - - -Postel [Page 47] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - Step 2 -- Relay Host to Destination Host - - R: 220 BBN-VAX Simple Mail Transfer Service Ready - S: HELO USC-ISIE - R: 250 BBN-VAX - - S: MAIL FROM:<@ISIE,JQP@MIT-AI> - R: 250 OK - - S: RCPT TO: - R: 250 OK - - S: DATA - R: 354 Start mail input; end with . - S: Mail-From: NCP host MIT-AI received by USC-ISIE at - 2-Nov-81 22:40:10 - S: Date: 2-Nov-81 22:33:44 - S: From: John Q. Public - S: Subject: The Next Meeting of the Board - S: To: Jones at BBN-Vax - S: - S: Bill: - S: The next meeting of the board of directors will be - S: on Tuesday. - S: John. - S: . - R: 250 OK - - S: QUIT - R: 221 USC-ISIE Service closing transmission channel - - Scenario 3 - - ------------------------------------------------------------- - - - - - - - - - - - - - - - -[Page 48] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - Verifying and Sending Scenario - - ------------------------------------------------------------- - - R: 220 SU-SCORE Simple Mail Transfer Service Ready - S: HELO MIT-MC - R: 250 SU-SCORE - - S: VRFY Crispin - R: 250 Mark Crispin - - S: SEND FROM: - R: 250 OK - - S: RCPT TO: - R: 250 OK - - S: DATA - R: 354 Start mail input; end with . - S: Blah blah blah... - S: ...etc. etc. etc. - S: . - R: 250 OK - - S: QUIT - R: 221 SU-SCORE Service closing transmission channel - - Scenario 4 - - ------------------------------------------------------------- - - - - - - - - - - - - - - - - - - - -Postel [Page 49] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - Sending and Mailing Scenarios - - First the user's name is verified, then an attempt is made to - send to the user's terminal. When that fails, the messages is - mailed to the user's mailbox. - - ------------------------------------------------------------- - - R: 220 SU-SCORE Simple Mail Transfer Service Ready - S: HELO MIT-MC - R: 250 SU-SCORE - - S: VRFY Crispin - R: 250 Mark Crispin - - S: SEND FROM: - R: 250 OK - - S: RCPT TO: - R: 450 User not active now - - S: RSET - R: 250 OK - - S: MAIL FROM: - R: 250 OK - - S: RCPT TO: - R: 250 OK - - S: DATA - R: 354 Start mail input; end with . - S: Blah blah blah... - S: ...etc. etc. etc. - S: . - R: 250 OK - - S: QUIT - R: 221 SU-SCORE Service closing transmission channel - - Scenario 5 - - ------------------------------------------------------------- - - - - - - -[Page 50] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - Doing the preceding scenario more efficiently. - - ------------------------------------------------------------- - - R: 220 SU-SCORE Simple Mail Transfer Service Ready - S: HELO MIT-MC - R: 250 SU-SCORE - - S: VRFY Crispin - R: 250 Mark Crispin - - S: SOML FROM: - R: 250 OK - - S: RCPT TO: - R: 250 User not active now, so will do mail. - - S: DATA - R: 354 Start mail input; end with . - S: Blah blah blah... - S: ...etc. etc. etc. - S: . - R: 250 OK - - S: QUIT - R: 221 SU-SCORE Service closing transmission channel - - Scenario 6 - - ------------------------------------------------------------- - - - - - - - - - - - - - - - - - - - -Postel [Page 51] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - Mailing List Scenario - - First each of two mailing lists are expanded in separate sessions - with different hosts. Then the message is sent to everyone that - appeared on either list (but no duplicates) via a relay host. - - ------------------------------------------------------------- - - Step 1 -- Expanding the First List - - R: 220 MIT-AI Simple Mail Transfer Service Ready - S: HELO SU-SCORE - R: 250 MIT-AI - - S: EXPN Example-People - R: 250- - R: 250-Fred Fonebone - R: 250-Xenon Y. Zither - R: 250-Quincy Smith <@ISIF,Q-Smith@ISI-VAXA> - R: 250- - R: 250 - - S: QUIT - R: 221 MIT-AI Service closing transmission channel - - - - - - - - - - - - - - - - - - - - - - - - - -[Page 52] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - Step 2 -- Expanding the Second List - - R: 220 MIT-MC Simple Mail Transfer Service Ready - S: HELO SU-SCORE - R: 250 MIT-MC - - S: EXPN Interested-Parties - R: 250-Al Calico - R: 250- - R: 250-Quincy Smith <@ISIF,Q-Smith@ISI-VAXA> - R: 250- - R: 250 - - S: QUIT - R: 221 MIT-MC Service closing transmission channel - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Postel [Page 53] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - Step 3 -- Mailing to All via a Relay Host - - R: 220 USC-ISIE Simple Mail Transfer Service Ready - S: HELO SU-SCORE - R: 250 USC-ISIE - - S: MAIL FROM: - R: 250 OK - S: RCPT TO:<@ISIE,ABC@MIT-MC> - R: 250 OK - S: RCPT TO:<@ISIE,Fonebone@ISIQ> - R: 250 OK - S: RCPT TO:<@ISIE,XYZ@MIT-AI> - R: 250 OK - S: RCPT TO:<@ISIE,@ISIF,Q-Smith@ISI-VAXA> - R: 250 OK - S: RCPT TO:<@ISIE,joe@FOO-UNIX> - R: 250 OK - S: RCPT TO:<@ISIE,xyz@BAR-UNIX> - R: 250 OK - S: RCPT TO:<@ISIE,fred@BBN-UNIX> - R: 250 OK - - S: DATA - R: 354 Start mail input; end with . - S: Blah blah blah... - S: ...etc. etc. etc. - S: . - R: 250 OK - - S: QUIT - R: 221 USC-ISIE Service closing transmission channel - - Scenario 7 - - ------------------------------------------------------------- - - - - - - - - - - - - - -[Page 54] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - Forwarding Scenarios - - ------------------------------------------------------------- - - R: 220 USC-ISIF Simple Mail Transfer Service Ready - S: HELO LBL-UNIX - R: 250 USC-ISIF - - S: MAIL FROM: - R: 250 OK - - S: RCPT TO: - R: 251 User not local; will forward to - - S: DATA - R: 354 Start mail input; end with . - S: Blah blah blah... - S: ...etc. etc. etc. - S: . - R: 250 OK - - S: QUIT - R: 221 USC-ISIF Service closing transmission channel - - Scenario 8 - - ------------------------------------------------------------- - - - - - - - - - - - - - - - - - - - - - - -Postel [Page 55] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - ------------------------------------------------------------- - - Step 1 -- Trying the Mailbox at the First Host - - R: 220 USC-ISIF Simple Mail Transfer Service Ready - S: HELO LBL-UNIX - R: 250 USC-ISIF - - S: MAIL FROM: - R: 250 OK - - S: RCPT TO: - R: 251 User not local; will forward to - - S: RSET - R: 250 OK - - S: QUIT - R: 221 USC-ISIF Service closing transmission channel - - Step 2 -- Delivering the Mail at the Second Host - - R: 220 USC-ISIA Simple Mail Transfer Service Ready - S: HELO LBL-UNIX - R: 250 USC-ISIA - - S: MAIL FROM: - R: 250 OK - - S: RCPT TO: - R: OK - - S: DATA - R: 354 Start mail input; end with . - S: Blah blah blah... - S: ...etc. etc. etc. - S: . - R: 250 OK - - S: QUIT - R: 221 USC-ISIA Service closing transmission channel - - Scenario 9 - - ------------------------------------------------------------- - - - - -[Page 56] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - Too Many Recipients Scenario - - ------------------------------------------------------------- - - R: 220 BERKELEY Simple Mail Transfer Service Ready - S: HELO USC-ISIF - R: 250 BERKELEY - - S: MAIL FROM: - R: 250 OK - - S: RCPT TO: - R: 250 OK - - S: RCPT TO: - R: 552 Recipient storage full, try again in another transaction - - S: DATA - R: 354 Start mail input; end with . - S: Blah blah blah... - S: ...etc. etc. etc. - S: . - R: 250 OK - - S: MAIL FROM: - R: 250 OK - - S: RCPT TO: - R: 250 OK - - S: DATA - R: 354 Start mail input; end with . - S: Blah blah blah... - S: ...etc. etc. etc. - S: . - R: 250 OK - - S: QUIT - R: 221 BERKELEY Service closing transmission channel - - Scenario 10 - - ------------------------------------------------------------- - - - - - - -Postel [Page 57] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - -GLOSSARY - - ASCII - - American Standard Code for Information Interchange [1]. - - command - - A request for a mail service action sent by the sender-SMTP to the - receiver-SMTP. - - end of mail data indication - - A special sequence of characters that indicates the end of the - mail data. In particular, the five characters carriage return, - line feed, period, carriage return, line feed, in that order. - - host - - A computer in the internetwork environment on which mailboxes or - SMTP processes reside. - - line - - A line of text ending with a . - - mail data - - A sequence of ASCII characters of arbitrary length, which conforms - to the standard set in the Standard for the Format of ARPA Network - Text Messages (RFC 733 [2]). - - mailbox - - A character string (address) which identifies a user to whom mail - is to be sent. Mailbox normally consists of the host and user - specifications. The standard mailbox naming convention is defined - to be "user@host". Additionally, the "container" in which mail is - stored. - - receiver-SMTP process - - A process which transfers mail in cooperation with a sender-SMTP - process. It waits for a connection to be established via the - transport service. It receives SMTP commands from the - sender-SMTP, sends replies, and performs the specified operations. - - - -[Page 58] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - - reply - - A reply is an acknowledgment (positive or negative) sent from - receiver to sender via the transmission channel in response to a - SMTP command. The general form of a reply is a completion code - (including error codes) followed by a text string. The codes are - for use by programs and the text is usually intended for human - users. - - sender-SMTP process - - A process which transfers mail in cooperation with a receiver-SMTP - process. A local language may be used in the user interface - command/reply dialogue. The sender-SMTP initiates the transport - service connection. It initiates SMTP commands, receives replies, - and governs the transfer of mail. - - session - - The set of exchanges that occur while the transmission channel is - open. - - transaction - - The set of exchanges required for one message to be transmitted - for one or more recipients. - - transmission channel - - A full-duplex communication path between a sender-SMTP and a - receiver-SMTP for the exchange of commands, replies, and mail - text. - - transport service - - Any reliable stream-oriented data communication services. For - example, NCP, TCP, NITS. - - user - - A human being (or a process on behalf of a human being) wishing to - obtain mail transfer service. In addition, a recipient of - computer mail. - - - - - - -Postel [Page 59] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - word - - A sequence of printing characters. - - - - The characters carriage return and line feed (in that order). - - - - The space character. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -[Page 60] Postel - - - -RFC 788 November 1981 - Simple Mail Transfer Protocol - - - -REFERENCES - - [1] ASCII - - ASCII, "USA Code for Information Interchange", United States of - America Standards Institute, X3.4, 1968. Also in: Feinler, E. - and J. Postel, eds., "ARPANET Protocol Handbook", NIC 7104, for - the Defense Communications Agency by SRI International, Menlo - Park, California, Revised January 1978. - - [2] RFC 733 - - Crocker, D., J. Vittal, K. Pogran, and D. Henderson, "Standard for - the Format of ARPA Network Text Messages," RFC 733, NIC 41952, - November 1977. Also in: Feinler, E. and J. Postel, eds., - "ARPANET Protocol Handbook", NIC 7104, for the Defense - Communications Agency by SRI International, Menlo Park, - California, Revised January 1978. - - [3] TCP - - Postel, J., ed., "Transmission Control Protocol - DARPA Internet - Program Protocol Specification", RFC 793, USC/Information Sciences - Institute, September 1981. - - [4] NCP - - McKenzie,A., "Host/Host Protocol for the ARPA Network", NIC 8246, - January 1972. Also in: Feinler, E. and J. Postel, eds., "ARPANET - Protocol Handbook", NIC 7104, for the Defense Communications - Agency by SRI International, Menlo Park, California, Revised - January 1978. - - [5] Initial Connection Protocol - - Postel, J., "Official Initial Connection Protocol", NIC 7101, - 11 June 1971. Also in: Feinler, E. and J. Postel, eds., "ARPANET - Protocol Handbook", NIC 7104, for the Defense Communications - Agency by SRI International, Menlo Park, California, Revised - January 1978. - - [6] NITS - - PSS/SG3, "A Network Independent Transport Service", Study Group 3, - The Post Office PSS Users Group, February 1980. Available from - the DCPU, National Physical Laboratory, Teddington, UK. - - - -Postel [Page 61] - - - -November 1981 RFC 788 -Simple Mail Transfer Protocol - - - - [7] X.25 - - CCITT, "Recommendation X.25 - Interface Between Data Terminal - Equipment (DTE) and Data Circuit-terminating Equipment (DCE) for - Terminals Operating in the Packet Mode on Public Data Networks," - CCITT Orange Book, Vol. VIII.2, International Telephone and - Telegraph Consultative Committee, Geneva, 1976. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -[Page 62] Postel - diff --git a/htmlparser/tests/html/br001.html b/htmlparser/tests/html/br001.html deleted file mode 100644 index 5fb691d42793..000000000000 --- a/htmlparser/tests/html/br001.html +++ /dev/null @@ -1,11 +0,0 @@ - - - - - There no such thing as br end tag, it should be ignored. -

    - the output should be a single br tag -
    no line break here. -
    This is Different than Nav 4.0, but we decided to do this. - - diff --git a/htmlparser/tests/html/bug10049.html b/htmlparser/tests/html/bug10049.html deleted file mode 100644 index f79a6a3c7900..000000000000 --- a/htmlparser/tests/html/bug10049.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - -bug10049 - -
    -Next...

    - - -SOCKS proxy: -
    text in blockquote
    -
    - - \ No newline at end of file diff --git a/htmlparser/tests/html/bug10324.html b/htmlparser/tests/html/bug10324.html deleted file mode 100644 index f341607307b9..000000000000 --- a/htmlparser/tests/html/bug10324.html +++ /dev/null @@ -1,11 +0,0 @@ -bug10324 - - -Next...

    - - -
    - center small -
    - small -
    \ No newline at end of file diff --git a/htmlparser/tests/html/bug11381.html b/htmlparser/tests/html/bug11381.html deleted file mode 100644 index 78ba7b803060..000000000000 --- a/htmlparser/tests/html/bug11381.html +++ /dev/null @@ -1,47 +0,0 @@ - - - - - -bug11381 - - -Next...

    - - -

    <TABLE><TR><TD><FONT COLOR="red"><FORM>...

    - - - -
    - - - - -
    - HELLO -
    -
    - - - -
    - -
    - -
    - - - -

    Exactly the same but without the FONT tag.

    - - - - - - - -
    - -
    - -
    - - - - - - - diff --git a/htmlparser/tests/html/bug12118.html b/htmlparser/tests/html/bug12118.html deleted file mode 100644 index dd2bc5df2297..000000000000 --- a/htmlparser/tests/html/bug12118.html +++ /dev/null @@ -1,14 +0,0 @@ -bug12118 - - -Next...

    - - - - -

    - -Full 30 days unconditional money back guarantee. - -

    - diff --git a/htmlparser/tests/html/bug12269.html b/htmlparser/tests/html/bug12269.html deleted file mode 100644 index 51fb607cef77..000000000000 --- a/htmlparser/tests/html/bug12269.html +++ /dev/null @@ -1,38 +0,0 @@ - - - - - - - - - - - - - - - - -
    - - Stuart Parmenter  - - - cell1 -
    cell2cell3cell4
    - - - diff --git a/htmlparser/tests/html/bug12468.html b/htmlparser/tests/html/bug12468.html deleted file mode 100644 index 17f2a81a5d96..000000000000 --- a/htmlparser/tests/html/bug12468.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - -bugbug12468 - - -Next...

    - -

    <div>Swing 1.1.1 goes primetime</div>
    - -
    <div>Swing 1.1.1 goes primetime</div>
    - -
    <font><div>Swing 1.1.1 goes primetime</div></font>
    - -
    <font><div>Swing 1.1.1 goes primetime</div></font>
    - - - diff --git a/htmlparser/tests/html/bug1259.html b/htmlparser/tests/html/bug1259.html deleted file mode 100644 index 64274df97136..000000000000 --- a/htmlparser/tests/html/bug1259.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - -bug1259 -Next...

    - - - -
      -
    • - If this text is 16pt, red, arial - then then there is a style LEAK IN. -
    • -
    - - \ No newline at end of file diff --git a/htmlparser/tests/html/bug12632.html b/htmlparser/tests/html/bug12632.html deleted file mode 100644 index b9e2cb7fc969..000000000000 --- a/htmlparser/tests/html/bug12632.html +++ /dev/null @@ -1,27 +0,0 @@ - - -bug12632 - - -Next...

    - -
    - This should be small, but center nested inside font makes it normal -
    -
    - -
    - This should be red, but div nested inside font makes it black -
    -
    - -
    - Now the font is inside the center, so it is small -
    - -
    - Now the font is inside the div, so it is red -
    - - - \ No newline at end of file diff --git a/htmlparser/tests/html/bug13107.html b/htmlparser/tests/html/bug13107.html deleted file mode 100644 index 9e787e8f3ad6..000000000000 --- a/htmlparser/tests/html/bug13107.html +++ /dev/null @@ -1,11 +0,0 @@ -bug13107 - - -Next...

    - -

    - -Hello. -

    -Goodbye - \ No newline at end of file diff --git a/htmlparser/tests/html/bug14276.html b/htmlparser/tests/html/bug14276.html deleted file mode 100644 index f20d5f4ceed8..000000000000 --- a/htmlparser/tests/html/bug14276.html +++ /dev/null @@ -1,20 +0,0 @@ - - - -bug14276 - - -Next...

    - - -

    -

    - Rom I
    -

    - September
    -

    Wien

    - Juni ´99

    -
    - - - diff --git a/htmlparser/tests/html/bug14636.html b/htmlparser/tests/html/bug14636.html deleted file mode 100644 index 6380108a8b39..000000000000 --- a/htmlparser/tests/html/bug14636.html +++ /dev/null @@ -1,20 +0,0 @@ -Testcase for bug 14636 - - - - - -Next...

    - - - -

    - -<font face="Verdana,Arial,Helvetica"><p>The Shining is essential Kubrick...</p></font> - -

    - -
    - - - diff --git a/htmlparser/tests/html/bug14918.html b/htmlparser/tests/html/bug14918.html deleted file mode 100644 index dc83871d9380..000000000000 --- a/htmlparser/tests/html/bug14918.html +++ /dev/null @@ -1,7 +0,0 @@ -bug14918 - - -Next...

    - -

    The Cleaner 3 Beta Build 3023 Saturday, Sep. 25 \ No newline at end of file diff --git a/htmlparser/tests/html/bug18159.html b/htmlparser/tests/html/bug18159.html deleted file mode 100644 index b0a08925ec51..000000000000 --- a/htmlparser/tests/html/bug18159.html +++ /dev/null @@ -1,49 +0,0 @@ -Testcase for bug 18159 - - - - - -Next...

    - - - - - - - - - - -
    - -
    Sport Stories
    - -
    - - - - - - - - - - - - - -
    - -   Special Features - - - - Downloads   - -
    - - - - - diff --git a/htmlparser/tests/html/bug18185.html b/htmlparser/tests/html/bug18185.html deleted file mode 100644 index bc2f197bda61..000000000000 --- a/htmlparser/tests/html/bug18185.html +++ /dev/null @@ -1,7 +0,0 @@ -bug18185 - - -Next...

    - - -I am one and two,

    I am i and 5+3i; \ No newline at end of file diff --git a/htmlparser/tests/html/bug18403.html b/htmlparser/tests/html/bug18403.html deleted file mode 100644 index 27579e663060..000000000000 --- a/htmlparser/tests/html/bug18403.html +++ /dev/null @@ -1,17 +0,0 @@ - - - -bugbug18403 - - -Next...

    - - -(1)Some green text -


    -(2)Some red text, which can't be highlighted by the mouse

    -(3)Some text that should be red, but isn't

    -
    - - - diff --git a/htmlparser/tests/html/bug18865.html b/htmlparser/tests/html/bug18865.html deleted file mode 100644 index 0b1209a8f45d..000000000000 --- a/htmlparser/tests/html/bug18865.html +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - -bug18865 - - -Next...

    - - - -This text is displayed in the assigned font color (black) -
    - This text is displayed in white rather than font color assigned. This text is not using the assigned font color but rather - the text color assigned in the BODY element. -
    - -
    - - - diff --git a/htmlparser/tests/html/bug19172.html b/htmlparser/tests/html/bug19172.html deleted file mode 100644 index e9f6d580d4c9..000000000000 --- a/htmlparser/tests/html/bug19172.html +++ /dev/null @@ -1,27 +0,0 @@ - - - - -bug19172 - - -Next...

    - -Linux News sites: -

    -

      -
    1. LWN -
    2. LinuxWorld -
    3. Wired -
    4. Resources -
    5. Gazette -
    -

    -Games: -

    -

  • LinuxGames -
  • Game Tome News -

    - - - diff --git a/htmlparser/tests/html/bug19194.html b/htmlparser/tests/html/bug19194.html deleted file mode 100644 index 212f32c14241..000000000000 --- a/htmlparser/tests/html/bug19194.html +++ /dev/null @@ -1,8 +0,0 @@ -bug19194 - - -Next...

    - - -

      ...
    - \ No newline at end of file diff --git a/htmlparser/tests/html/bug20030.html b/htmlparser/tests/html/bug20030.html deleted file mode 100644 index 35fce983dd38..000000000000 --- a/htmlparser/tests/html/bug20030.html +++ /dev/null @@ -1,6 +0,0 @@ -bug20030 - - -Next...

    - -Should be same size as this. \ No newline at end of file diff --git a/htmlparser/tests/html/bug20178.html b/htmlparser/tests/html/bug20178.html deleted file mode 100644 index a4669af3280b..000000000000 --- a/htmlparser/tests/html/bug20178.html +++ /dev/null @@ -1,13 +0,0 @@ -bug20178 - - -Next...

    - - -

    Link to bad.html

    - - - - - -
    Should be link to good.html
    \ No newline at end of file diff --git a/htmlparser/tests/html/bug20199.html b/htmlparser/tests/html/bug20199.html deleted file mode 100644 index 1bf06976206d..000000000000 --- a/htmlparser/tests/html/bug20199.html +++ /dev/null @@ -1,23 +0,0 @@ - - -bug20199 - - -Next...

    - -

    This HTML demonstrates a bug this HTML parsing of the anchor (A) tag. The first link (to bugzilla.mozilla.org) -contains a paragraph(P) tag inside the anchor(A) tag. This messes up the entire document.

    - -

    Link to bugzilla.mozilla.org
    -Link to mozilla.org
    -This should not be a link!!!
    -This should not be a link!!!
    -This should not be a link!!!
    -

    -

    -Neither should this!!
    -Neither should this!!
    -Neither should this!!
    -

    - - diff --git a/htmlparser/tests/html/bug21186.html b/htmlparser/tests/html/bug21186.html deleted file mode 100644 index 03f1d2d2a80c..000000000000 --- a/htmlparser/tests/html/bug21186.html +++ /dev/null @@ -1,13 +0,0 @@ - - - -bug21186 - - -Next...

    - -Bad Link in a span -
    -Good Link without a span - - diff --git a/htmlparser/tests/html/bug21318.html b/htmlparser/tests/html/bug21318.html deleted file mode 100644 index cd0800f14791..000000000000 --- a/htmlparser/tests/html/bug21318.html +++ /dev/null @@ -1,10 +0,0 @@ -bug21318 - - -Next...

    - -

    Birdie Num Num praises MAGNOLIA

    - -the

    block is causing the link not to be displayed. I believe that having -the

    block within the hyperlink is illegal HTML. The link does work if the -

    is outside the block. \ No newline at end of file diff --git a/htmlparser/tests/html/bug21424.html b/htmlparser/tests/html/bug21424.html deleted file mode 100644 index d4b0deb8d631..000000000000 --- a/htmlparser/tests/html/bug21424.html +++ /dev/null @@ -1,14 +0,0 @@ -bug21424 - - -Next...

    - - - - Enterprise Computing
     E-commerce
     Communications
     The Net
     Personal Technology
     Services & Consulting
     Year 2000
     CNET Investor New!
     CNET News.com TV
     CNET -Radio
     Perspectives
     Newsmakers
     Rumor Mill
     One Week View
     FREE Newsletter

    - -
    A D V E R T I S E M E N T
    Get a New Job!
    at CNET's Career Center
    Search for a job now
    Email me job listings
    Our featured employer


    - - - diff --git a/htmlparser/tests/html/bug21689.html b/htmlparser/tests/html/bug21689.html deleted file mode 100644 index 3deafb36dd1c..000000000000 --- a/htmlparser/tests/html/bug21689.html +++ /dev/null @@ -1,13 +0,0 @@ - - - -bug21689 - - -Next...

    - - -

    test

    - -

    foo \ No newline at end of file diff --git a/htmlparser/tests/html/bug21692.html b/htmlparser/tests/html/bug21692.html deleted file mode 100644 index 8b9692c158f0..000000000000 --- a/htmlparser/tests/html/bug21692.html +++ /dev/null @@ -1,38 +0,0 @@ - - -bug21692 - - -Next...

    - - - - - - - - - - -the code is like this:
    - -style-stuff:
    - -<style type="text/css"> -.rubrikval { font-size: 20pt; } -</style> -

    - -html-stuff:
    - -<a href="anmalan.html"><span class="rubrikval">On-line formulär</span></a>
    -<a href="anmalan.html">On-line formulär</a> - -

    - -On-line formulär -On-line formulär - - diff --git a/htmlparser/tests/html/bug21779.html b/htmlparser/tests/html/bug21779.html deleted file mode 100644 index 7bb5909bb81f..000000000000 --- a/htmlparser/tests/html/bug21779.html +++ /dev/null @@ -1,13 +0,0 @@ -bug21779 - - -Next...

    - - - - - - -
    C11 -
    C21 -
    \ No newline at end of file diff --git a/htmlparser/tests/html/bug22025.html b/htmlparser/tests/html/bug22025.html deleted file mode 100644 index 252e65db007f..000000000000 --- a/htmlparser/tests/html/bug22025.html +++ /dev/null @@ -1,7 +0,0 @@ -bug22025 - - -Next...

    - -

    Mozilla

    -

    Mozilla

    diff --git a/htmlparser/tests/html/bug22142.html b/htmlparser/tests/html/bug22142.html deleted file mode 100644 index 872c57cf0b2c..000000000000 --- a/htmlparser/tests/html/bug22142.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - -bug22142 - - -Next...

    - -BeforeThis should be clickableAfter - -text after - - diff --git a/htmlparser/tests/html/bug22157.html b/htmlparser/tests/html/bug22157.html deleted file mode 100644 index eeadd4d4f922..000000000000 --- a/htmlparser/tests/html/bug22157.html +++ /dev/null @@ -1,66 +0,0 @@ - - - - - - - - - - -bug22157 - - -Next...

    - -Business
    - -Entertainment - - - - \ No newline at end of file diff --git a/htmlparser/tests/html/bug23529.html b/htmlparser/tests/html/bug23529.html deleted file mode 100644 index 8617cf3f1ee4..000000000000 --- a/htmlparser/tests/html/bug23529.html +++ /dev/null @@ -1,11 +0,0 @@ -bug23529 - - -Next...

    - - - - Public Records - -
    - Find Anyone, Background Checks, SS# Checks, Adoption Reunions
    .. \ No newline at end of file diff --git a/htmlparser/tests/html/bug23680.html b/htmlparser/tests/html/bug23680.html deleted file mode 100644 index 6274897ab636..000000000000 --- a/htmlparser/tests/html/bug23680.html +++ /dev/null @@ -1,13 +0,0 @@ -bug23680 - - -Next...

    - - - - -
    /*  
    -   
    /* 
    - 
     * The contents of this file are subject to the Netscape Public   
    -   
     * The contents of this file are subject to the Netscape Public 
    -
    diff --git a/htmlparser/tests/html/bug23780.html b/htmlparser/tests/html/bug23780.html deleted file mode 100644 index d43aefcf4fdf..000000000000 --- a/htmlparser/tests/html/bug23780.html +++ /dev/null @@ -1,22 +0,0 @@ - -Bug23780 - - - - -Next...

    - - -

    This is bad html because this Anchor is missing a -end A element.

    - However this new link contains a start and end anchor -

    This text will have the use the href of the first anchor that wasn't closed.

    - - - - - - - - - diff --git a/htmlparser/tests/html/bug23831.html b/htmlparser/tests/html/bug23831.html deleted file mode 100644 index d22536c0a339..000000000000 --- a/htmlparser/tests/html/bug23831.html +++ /dev/null @@ -1,5 +0,0 @@ -bug23831 -Next...

    - - -

    Link -- after link

    \ No newline at end of file diff --git a/htmlparser/tests/html/bug24003.html b/htmlparser/tests/html/bug24003.html deleted file mode 100644 index 9924f961da9b..000000000000 --- a/htmlparser/tests/html/bug24003.html +++ /dev/null @@ -1,12 +0,0 @@ -bug24003 -Next...

    - - - - - -
    - - text here - -
    diff --git a/htmlparser/tests/html/bug2419.html b/htmlparser/tests/html/bug2419.html deleted file mode 100644 index c6fcb88464b5..000000000000 --- a/htmlparser/tests/html/bug2419.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - -Bug2419 - - -Next...

    - - - - 100% - - red text  text - - \ No newline at end of file diff --git a/htmlparser/tests/html/bug2447.html b/htmlparser/tests/html/bug2447.html deleted file mode 100644 index f58e6f34cea6..000000000000 --- a/htmlparser/tests/html/bug2447.html +++ /dev/null @@ -1,14 +0,0 @@ - - - -bug2447 - - -Next...

    - - - -

    T - -he

    - \ No newline at end of file diff --git a/htmlparser/tests/html/bug3073.html b/htmlparser/tests/html/bug3073.html deleted file mode 100644 index b4bf39c93979..000000000000 --- a/htmlparser/tests/html/bug3073.html +++ /dev/null @@ -1,23 +0,0 @@ - - - -bug3073 - - -Next...

    - - - Homepage Help -  |   - - - Privacy Policy -
    - - - - Flights -  |   - - - diff --git a/htmlparser/tests/html/bug466.html b/htmlparser/tests/html/bug466.html deleted file mode 100644 index 5fe41ff5310d..000000000000 --- a/htmlparser/tests/html/bug466.html +++ /dev/null @@ -1,9 +0,0 @@ - -bug466 - - -Next...

    - - bold b-i b normal - - \ No newline at end of file diff --git a/htmlparser/tests/html/bug4809.html b/htmlparser/tests/html/bug4809.html deleted file mode 100644 index 8efb3f2d8feb..000000000000 --- a/htmlparser/tests/html/bug4809.html +++ /dev/null @@ -1,19 +0,0 @@ - - -bug4809 - - -Next...

    - - - - - -
    - alt text -
    - - - - - diff --git a/htmlparser/tests/html/bug4814.html b/htmlparser/tests/html/bug4814.html deleted file mode 100644 index bbace1e48aab..000000000000 --- a/htmlparser/tests/html/bug4814.html +++ /dev/null @@ -1,16 +0,0 @@ - - - -bug4814 - - -Next...

    - - -One text -

    -Two text -
    -
    - - \ No newline at end of file diff --git a/htmlparser/tests/html/bug4825.html b/htmlparser/tests/html/bug4825.html deleted file mode 100644 index 1aef91ad144b..000000000000 --- a/htmlparser/tests/html/bug4825.html +++ /dev/null @@ -1,40 +0,0 @@ - - -stuttgart - - - - - - - - - - - - - - - - - - -
    text 1
    text 2 -
    text 3 - text 4 - - - - - -
    text 5

    -
    text 6 -
    -
    text 7
    -
    text 8
    -
    -
    text 9 -
    - - \ No newline at end of file diff --git a/htmlparser/tests/html/bug4956.html b/htmlparser/tests/html/bug4956.html deleted file mode 100644 index 1e35ce436d0d..000000000000 --- a/htmlparser/tests/html/bug4956.html +++ /dev/null @@ -1,46 +0,0 @@ - - - - -bug4956 - - -Next...

    - - - - -

    - -
    - -

    - - - - - -
    - - - -
    - -BRIAN MCGRORY

    - - Mooney's grass is always greener ... As the Sox return Tuesday for ... - - - - - diff --git a/htmlparser/tests/html/bug5859.html b/htmlparser/tests/html/bug5859.html deleted file mode 100644 index d4f8d5440a0d..000000000000 --- a/htmlparser/tests/html/bug5859.html +++ /dev/null @@ -1,36 +0,0 @@ - - - - -bug5859 - - -Next...

    - - -

    - -Floating Div, floating left. - -
    - - - - - - - -

    Paragraph

    - - - - - - - - - - - - - diff --git a/htmlparser/tests/html/bug6233.html b/htmlparser/tests/html/bug6233.html deleted file mode 100644 index 3d0a8aede864..000000000000 --- a/htmlparser/tests/html/bug6233.html +++ /dev/null @@ -1,18 +0,0 @@ - - -bug6233 - - -Next...

    - - -

    Before Inside After

    -
    -

    Hello

    - -
    -

    Hello

    - - - - diff --git a/htmlparser/tests/html/bug6925.html b/htmlparser/tests/html/bug6925.html deleted file mode 100644 index f90f27a083eb..000000000000 --- a/htmlparser/tests/html/bug6925.html +++ /dev/null @@ -1,20 +0,0 @@ - - -bug6925 - - -Next...

    - -hello - - - - -
    - -
    - inside - -
    -
    - \ No newline at end of file diff --git a/htmlparser/tests/html/bug7447.html b/htmlparser/tests/html/bug7447.html deleted file mode 100644 index 00941d2f8c37..000000000000 --- a/htmlparser/tests/html/bug7447.html +++ /dev/null @@ -1,29 +0,0 @@ - - - -bug7447 - - -Next...

    - - - - - - -
    -

    - - Some Text - -

    - - - - -
    - Nav4.x and IE align this text to the right -
    -
    - - \ No newline at end of file diff --git a/htmlparser/tests/html/bug7723.html b/htmlparser/tests/html/bug7723.html deleted file mode 100644 index 073fdb5e99f5..000000000000 --- a/htmlparser/tests/html/bug7723.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - -bug7723 - - -Next...

    - - -body color - -font color -
    -

    -bold text -

    -
    - - \ No newline at end of file diff --git a/htmlparser/tests/html/bug7724.html b/htmlparser/tests/html/bug7724.html deleted file mode 100644 index c3a5dc2caa09..000000000000 --- a/htmlparser/tests/html/bug7724.html +++ /dev/null @@ -1,12 +0,0 @@ - - - - -bug7724 - - -Next...

    - - -

    Hellothere - \ No newline at end of file diff --git a/htmlparser/tests/html/bug7823.html b/htmlparser/tests/html/bug7823.html deleted file mode 100644 index 13e151854de9..000000000000 --- a/htmlparser/tests/html/bug7823.html +++ /dev/null @@ -1,11 +0,0 @@ - - - -bug7823 - - -Next...

    - -

  • bold normal - - \ No newline at end of file diff --git a/htmlparser/tests/html/bug7889.html b/htmlparser/tests/html/bug7889.html deleted file mode 100644 index dd8e94fdc2e1..000000000000 --- a/htmlparser/tests/html/bug7889.html +++ /dev/null @@ -1,20 +0,0 @@ - - - -bug7889 - - -Next...

    - - - - -
    - -
    The entry to the Leaf dressing room. -

    -
    -The dressing room with stalls made of Canadian Maple. -

    - - \ No newline at end of file diff --git a/htmlparser/tests/html/bug8056.html b/htmlparser/tests/html/bug8056.html deleted file mode 100644 index b78fa2dee25d..000000000000 --- a/htmlparser/tests/html/bug8056.html +++ /dev/null @@ -1,6 +0,0 @@ -bug8056 - - -Next...

    - -

    Hello

    \ No newline at end of file diff --git a/htmlparser/tests/html/bug8080.html b/htmlparser/tests/html/bug8080.html deleted file mode 100644 index d3e34f463b32..000000000000 --- a/htmlparser/tests/html/bug8080.html +++ /dev/null @@ -1,422 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - -bug8080 - - -Next...

    - - - - - - - - - - - - - - - - - - - -
    - -ABC Australia - Online News[space]Indonesia election '99
     
    - -KFOR troops shoot Serb police officer
    - -
    - - - -Mon, 14 Jun 1999
    [space] - -
    - -Latest Bulletin
    - -email news delivery
    - -Breaking Stories
    - - - -Kosovo Crisis
    - - - -Indonesia
    - - - -World News from Radio Australia
    - -State News
    - -Business News
    - -Sport News
    - -Olympics news
    - -Tax Debate news special
    - -Science and Technology news
    - -Rural News
    - -Features
    - -Weather
    - -[space]
    - -Regional News Service
    [space]
    - - - -[space]
    - -ABC News Guide
    - -About This Site
    - -Search
    - - - - - - - - - -
    - - - - - -British officials say peacekeeping troops in Kosovo have shot dead a man believed to be an off-duty Serb police officer who opened fire at them in the provincial capital, Pristina.
    - - - -
    - - - -The incident was the first reported armed violence between the NATO-led peace force (KFOR) and Serbs since it began entering the southern province of Yugoslavia.
    - - - -
    - - - -British officers, in a paratroop force fanning out through eastern Pristina to secure it while mechanised infantry were taking control of the west, said the assailant was warned several times to put down his gun but instead began shooting.
    - - - -
    - - - -"This guy came up shouting, with a pistol in his hand, one officer told the Reuters news agency.
    - - - -
    - - - -
    ABC VIDEO
    kosovo
    NATO moves into Kosovo as the Yugoslav military and Serb civilians move out. Katy Cronin reports.

    kosovo
    NATO makes slow progress as soldiers begin their sweep of the province. Eric Campbell reports.

    kosovo
    Kosovar refugees in Australia are making cautious preparations to return home. Prue Clarke reports.

    Requires RealPlayer


    "He fired a shot at our guys and they fired back. He's dead."
    - - - -
    - - - -The unidentified man was not in uniform but was believed to be an off-duty officer of the paramilitary Interior Ministry (MUP) police, paratroopers told Reuters at the scene in the courtyard of an apartment complex.
    - - - -
    - - - -Serbs have greeted KFOR's arrival with sullen hostility for the most part, while ethnic Albanians who suffered under Serbian police repression have been celebrating.
    - - - -
    - - - -It seems the shooting had an odd beginning.
    - - - -
    - - - -British Captain Andrew Reeds says eight paratroopers were conversing and having their photographs taken with two MUP officers when the assailant appeared from the direction of a bar in the courtyard frequented by Serbs.
    - - - -
    - - - -The man, possibly objecting to apparent fraternisation between the Serb police officers and British troops, was brandishing a semi-automatic pistol and the paratroopers warned him six times to stop and drop the weapon.
    - - - -
    - - - -But he ignored them and fired from a distance of about 25 metres. Paratroopers returned fire and killed him.
    - - - -
    - - - -MUP personnel must withdraw along with Yugoslav army forces by June 20 by agreement with NATO to clear the way for the return of around 1 million ethnic Albanian refugees.
    - - - -
    - - - -
    - - - -More troops
    - - - -
    - - - -Some 1,600 British troops have moved into the capital of Kosovo, Pristina, as ethnic Albanian rebels killed up to four Serb security men.
    - - - -
    - - - -British Army Lieutenant Colonel Nick Clissitt told the AFP news agency: "We're coming into town. We're stabilising it."
    - - - -
    - - - -He says 800 paratroopers with the 5th Airborne Brigade landed to the east of Kosovo by helicopter before moving through the capital while another 800 Irish guards were in the centre but heading west.
    - - - -
    - - - -Lieutenant Colonel Clissitt added that the 1,600 troops were moving as scheduled under the deployment of an international NATO peacekeeping force that pushed into Kosovo on Saturday from Macedonia.
    - - - -
    - - - -Entering Kosovo on Saturday were about 6,000 British troops and 1,500 French troops with the NATO-led Kosovo force (KFOR).
    - - - -
    - - - -US, German and Italian troops were also due to enter the southern Serb province.
    - - - -
    - - - -In a move that caught NATO off guard, around 300 Russian troops arrived first in Kosovo on Friday, setting up base at the Pristina airport and pre-empting British and French troops slated to lead NATO's deployment in Kosovo.
    - - - -
    - - - -Lieutenant Colonel Clissitt said two Yugoslav soldiers and one Serb policeman were killed in clashes with rebels in areas of Pristina, but said the NATO force was not involved in defusing the incidents.
    - - - -
    - - - -Referring to the same incidents, Serb sources said two policemen and two Yugoslav soldiers were killed by Kosovo Liberation Army fighters.
    - - - -
    - - - -Asked if British troops were disarming Kosovo Liberation Army rebels, Lieutenant Colonel Clissitt replied: "We will not tolerate any armed group using its muscle.
    - - - -
    - - - -"We will respond robustly."
    - - - -
    - - - -NATO troops would ensure the terms of last week's agreement ending the NATO air war are enforced, he said.
    - - - -
    - - - -On Saturday, British paratroopers who landed at a village south of Pristina were greeted by a group of KLA who refused to disarm and headed to the mountains.
    - - - -
    - - - -More news and video and audio archives can be found on the Kosovo page. - -
    - -

    - - - - - -


    - -
    - -Previous Story - -Return to main bulletin page - -Next Story - - - -

    - -

    - -ABC Online

    - -© 1999 Australian Broadcasting Corporation

    - -Updated: Mon, 14 Jun 1999 at 7:24 AM (AEST) - -
    - -AEST = Australian Eastern Standard Time which is 10 hours ahead of UTC (Greenwich Mean Time) - -

    - -
    - - - - - - - - - - - - - - - - - diff --git a/htmlparser/tests/html/bug8681.html b/htmlparser/tests/html/bug8681.html deleted file mode 100644 index 194c8d36c0e1..000000000000 --- a/htmlparser/tests/html/bug8681.html +++ /dev/null @@ -1,8 +0,0 @@ -bug8681 - - -Next...

    - -this is arial -
    this is pre
    -
    \ No newline at end of file diff --git a/htmlparser/tests/html/bug8738.html b/htmlparser/tests/html/bug8738.html deleted file mode 100644 index 3df7b62e16b3..000000000000 --- a/htmlparser/tests/html/bug8738.html +++ /dev/null @@ -1,28 +0,0 @@ - - -bug8738 - - -Next...

    - - - - - - - - - - -
    - -
    - - - -

    - - Hippie Amp Beta Test - -

    - diff --git a/htmlparser/tests/html/bug8771.html b/htmlparser/tests/html/bug8771.html deleted file mode 100644 index d1b098f2f740..000000000000 --- a/htmlparser/tests/html/bug8771.html +++ /dev/null @@ -1,21 +0,0 @@ - - - -bug8771 - - -Next...

    - - - - - -
    - -
    June 27, 1999
    -
    -
    - - \ No newline at end of file diff --git a/htmlparser/tests/html/bug8913.html b/htmlparser/tests/html/bug8913.html deleted file mode 100644 index 8283cce9414c..000000000000 --- a/htmlparser/tests/html/bug8913.html +++ /dev/null @@ -1,112 +0,0 @@ - - - - - -bug8913 - - -Next...

    - - - - - - - - -before list - -
    • inside list
    - -after list - -
    - - - -

    - - - - - -before div - -
    inside div
    - -after div - -
    - - - -

    - - - - - -before table - -
    inside table
    - -after table - -
    - - - -

    - - - - - -before div - -
    inside div
    - -after div - -
    - - - -

    - - - - - -before div - -
    inside div
    - -after div - -
    - - - -

    - - - - - -before span - -inside span - -after span - - - - - - - - - - \ No newline at end of file diff --git a/htmlparser/tests/html/bug8996.html b/htmlparser/tests/html/bug8996.html deleted file mode 100644 index f0137d9fab58..000000000000 --- a/htmlparser/tests/html/bug8996.html +++ /dev/null @@ -1,18 +0,0 @@ - - -bug8996 - - -Next...

    - -blah blah blah blah blah blah blah blah blah blah blah blah blah blah blah blah -blah blah blah blah blah blah blah
    - -
    -blah
    -
    -
    -blah blah blah blah blah blah blah blah blah blah blah blah blah blah blah blah -blah blah blah blah blah blah blah
    - -blah
    \ No newline at end of file diff --git a/htmlparser/tests/html/bug9536.html b/htmlparser/tests/html/bug9536.html deleted file mode 100644 index 5d55383e1ef2..000000000000 --- a/htmlparser/tests/html/bug9536.html +++ /dev/null @@ -1,38 +0,0 @@ - - - List parsing bug - - - The following two sections should look identical -
    - Section #1: -
      -
        -
          -
        1. This is an ol:ul:ol -
        2. This is an ol:ul:ol -
        -
      • This is an ol:ul -
      • This is an ol:ul -
      -
    1. This is an ol -
    2. This is an ol -
    - -
    - Section #2: -
      -
        -
          -
        1. This is an ol:ul:ol
        2. -
        3. This is an ol:ul:ol
        4. -
        -
      • This is an ol:ul
      • -
      • This is an ol:ul
      • -
      -
    1. This is an ol
    2. -
    3. This is an ol
    4. -
    - - - diff --git a/htmlparser/tests/html/bug9563.html b/htmlparser/tests/html/bug9563.html deleted file mode 100644 index 175cbadee361..000000000000 --- a/htmlparser/tests/html/bug9563.html +++ /dev/null @@ -1,8 +0,0 @@ -bug9563 - - -Next...

    - - -

    Archives - \ No newline at end of file diff --git a/htmlparser/tests/html/bug991.html b/htmlparser/tests/html/bug991.html deleted file mode 100644 index 93ad94c753bb..000000000000 --- a/htmlparser/tests/html/bug991.html +++ /dev/null @@ -1,9 +0,0 @@ - -bug991 -Next...

    - -For example, the following: -

    Hello

    -...makes the paragraph italics. However, the following: -

    Hello

    -...does not make the paragraph red. This is inconsistent behaviour. \ No newline at end of file diff --git a/htmlparser/tests/html/button001.html b/htmlparser/tests/html/button001.html deleted file mode 100644 index 8de831c4681b..000000000000 --- a/htmlparser/tests/html/button001.html +++ /dev/null @@ -1,19 +0,0 @@ - - -
    -

    - First name:
    - Last name:
    - email:
    - Male
    - Female
    - - - -

    - - \ No newline at end of file diff --git a/htmlparser/tests/html/button002.html b/htmlparser/tests/html/button002.html deleted file mode 100644 index 04dda10f1e6a..000000000000 --- a/htmlparser/tests/html/button002.html +++ /dev/null @@ -1,34 +0,0 @@ - - - - test in front -
    -

    - First name:
    - Last name:
    - email:
    - Male
    - Female
    - - - text before reset - -

    - text at the end. -
    - - \ No newline at end of file diff --git a/htmlparser/tests/html/center001.html b/htmlparser/tests/html/center001.html deleted file mode 100644 index fe690af2f27a..000000000000 --- a/htmlparser/tests/html/center001.html +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - - -

    Text in H2 and center

    -after H2 tag. -

    text in p tag. -

    Text in center, start new line
    -text after center. -
    another line in the same Paragraph/ - -

    text in p tag. Same as above, but added a P tag before Center tag. -

    -

    Text in center, start new line
    -text after center. -
    another line in the same Paragraph - - -

    Text in H4 -

    text in H4 and P -
    more text -

    - - - - diff --git a/htmlparser/tests/html/center002.html b/htmlparser/tests/html/center002.html deleted file mode 100644 index d74f32494402..000000000000 --- a/htmlparser/tests/html/center002.html +++ /dev/null @@ -1,36 +0,0 @@ - - -CENTER tag can have HR -
    - - - - - - -
    - Magellan is cool, yes? -
    -
    -
    -
    the same as above, but in P tag -

    -

    - - - - - - -
    - Magellan is cool, yes? -
    -
    -
    - text after closing CENTER, but still in P. -

    -some text after P is closed. -

    another paragraph. - - - diff --git a/htmlparser/tests/html/col001.html b/htmlparser/tests/html/col001.html deleted file mode 100644 index 36da55a2c0f9..000000000000 --- a/htmlparser/tests/html/col001.html +++ /dev/null @@ -1,21 +0,0 @@ - - -test COLGROUP - - - - - -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    - -last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/col002.html b/htmlparser/tests/html/col002.html deleted file mode 100644 index 93db8c39baec..000000000000 --- a/htmlparser/tests/html/col002.html +++ /dev/null @@ -1,24 +0,0 @@ - - -test COL and COLGROUP - - - - - - - - -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    - -last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/col003.html b/htmlparser/tests/html/col003.html deleted file mode 100644 index 1ba1beebfc8f..000000000000 --- a/htmlparser/tests/html/col003.html +++ /dev/null @@ -1,31 +0,0 @@ - - -test THEAD - - - - - - - - - - - -
    cell H-1 - cell H-2 - cell H-3 - cell H-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    - -last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/col004.html b/htmlparser/tests/html/col004.html deleted file mode 100644 index cacd84d9150d..000000000000 --- a/htmlparser/tests/html/col004.html +++ /dev/null @@ -1,26 +0,0 @@ - - -test TFOOT - - - - - - -
    cell F-1 - cell F-2 - cell F-3 - cell F-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    - -last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/col005.html b/htmlparser/tests/html/col005.html deleted file mode 100644 index 4806a80cd435..000000000000 --- a/htmlparser/tests/html/col005.html +++ /dev/null @@ -1,39 +0,0 @@ - - -test both FHEAD and TFOOT -
    In MS IE4.0, TFOOT can be before THEAD - - - - - - - - - - - - - - -
    cell F-1 - cell F-2 - cell F-3 - cell F-4 -
    cell h-1 - cell h-2 - cell h-3 - cell h-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    - -last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/col006.html b/htmlparser/tests/html/col006.html deleted file mode 100644 index 2ae8b3fd42bc..000000000000 --- a/htmlparser/tests/html/col006.html +++ /dev/null @@ -1,261 +0,0 @@ - - -File COL006.html is a long table. -
    Tested with MS IE4.0 : THEAD and TFOOT are NOT specially treated when scrolled on screen, -or printed on paper. -
    Netscape can do better here - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    cell F-1 - cell F-2 - cell F-3 - cell F-4 -
    cell h-1 - cell h-2 - cell h-3 - cell h-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    - -last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/col007.html b/htmlparser/tests/html/col007.html deleted file mode 100644 index 6588078740cc..000000000000 --- a/htmlparser/tests/html/col007.html +++ /dev/null @@ -1,39 +0,0 @@ - - -2 THEAD section in table -
    MS IE4.0 - - - - - - - - - - - - - - -
    cell H-1 - cell H-2 - cell H-3 - cell H-4 -
    cell H2-1 - cell H2-2 - cell H2-3 - cell H2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    - -last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/col008.html b/htmlparser/tests/html/col008.html deleted file mode 100644 index 85ffba791bd9..000000000000 --- a/htmlparser/tests/html/col008.html +++ /dev/null @@ -1,35 +0,0 @@ - - -test 2 TFOOT. -
    MS IE4.0, only the first TFOOT is moved to the end of the table. -
    The second TFOOT tag is ignored. - - - - - - - - - -
    cell F-1 - cell F-2 - cell F-3 - cell F-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell F2-1 - cell F2-2 - cell F2-3 - cell F2-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    - -last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/col009.html b/htmlparser/tests/html/col009.html deleted file mode 100644 index 94bcc3fda5f8..000000000000 --- a/htmlparser/tests/html/col009.html +++ /dev/null @@ -1,29 +0,0 @@ - - -TFOOT after TBODY, MS IE4.0 can take it. -
    but HTML4.0 standard says "TFOOT must appear before TBODY." - - - - - - - - -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell F-1 - cell F-2 - cell F-3 - cell F-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    - -last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/col010.html b/htmlparser/tests/html/col010.html deleted file mode 100644 index c9143ed08e8a..000000000000 --- a/htmlparser/tests/html/col010.html +++ /dev/null @@ -1,22 +0,0 @@ - - -Test TBODY tag. - - - - - - -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    - -last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/col011.html b/htmlparser/tests/html/col011.html deleted file mode 100644 index 2bc5b13f381d..000000000000 --- a/htmlparser/tests/html/col011.html +++ /dev/null @@ -1,32 +0,0 @@ - - -Test empty THEAD tag. MS IE4.0 gives small space for each empty tr tag. - - - - - - - - - - - - - - - - -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    - -last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/col012.html b/htmlparser/tests/html/col012.html deleted file mode 100644 index e4ebd1b6e698..000000000000 --- a/htmlparser/tests/html/col012.html +++ /dev/null @@ -1,27 +0,0 @@ - - -Test THEAD after TBODY. MS IE4.0 can take it. -
    In HTML4.0 standard, THEAD must be before TBODY. - - - - - - -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell H-1 - cell H-2 - cell H-3 - cell H-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    - -last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/col013.html b/htmlparser/tests/html/col013.html deleted file mode 100644 index a13d22a3b30e..000000000000 --- a/htmlparser/tests/html/col013.html +++ /dev/null @@ -1,22 +0,0 @@ - - -test COLGROUP in middle of TBODY -
    MS IE4.0 takes it! - - - - - -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    - -last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/col014.html b/htmlparser/tests/html/col014.html deleted file mode 100644 index 5a23aef898a5..000000000000 --- a/htmlparser/tests/html/col014.html +++ /dev/null @@ -1,22 +0,0 @@ - - -test COLGROUP at the end of the table -
    MS IE4.0 ignores it. - - - - - -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    - -last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/col015.html b/htmlparser/tests/html/col015.html deleted file mode 100644 index 8ba0640f5969..000000000000 --- a/htmlparser/tests/html/col015.html +++ /dev/null @@ -1,22 +0,0 @@ - - -test COLGROUP in midlle of a TR. -
    In MS IE4.0, COLGROUP takes effect and starts new TR. - - - - - -
    cell 1-1 - cell 1-2 -
    cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    - -last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/col016.html b/htmlparser/tests/html/col016.html deleted file mode 100644 index b7f82db69e8f..000000000000 --- a/htmlparser/tests/html/col016.html +++ /dev/null @@ -1,31 +0,0 @@ - - -test TFOOT has 2 tr tags - - - - - - - -
    cell F-1 - cell F-2 - cell F-3 - cell F-4 -
    cell F2-1 - cell F2-2 - cell F2-3 - cell F2-4 -
    cell 1-1 - cell 1-2 - cell 1-3 - cell 1-4 -
    cell 2-1 - cell 2-2 - cell 2-3 - cell 2-4 -
    - -last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/comments.html b/htmlparser/tests/html/comments.html deleted file mode 100644 index d0cd752e852e..000000000000 --- a/htmlparser/tests/html/comments.html +++ /dev/null @@ -1,310 +0,0 @@ - - - - - - - - - Evil Tests: Comments - - - - - -

    Comments

    - -

    If you have any comments to make regarding this test, e-mail py8ieh=eviltests@bath.ac.uk.

    - -
    -
    Prerequisites
    -
    Browsers that are subjected to this test should support the -SGML comments: <!-- -- -- -->.
    -
    - -

    1. The comment delimited is --

    - -

    Note that there are four (4) sub-tests to this -section. Because of the nature of these tests, some may not appear. If -so, there is a bug. I repeat: there should be four separate tests in -section 1! Each consists of a bit of explanatory text, a purple box -with yellow text, and some explanatory markup.

    - -

    The comment delimiter is --, not <!-- -and -->. In the following sentence, the word -INCORRECTLY should not appear:

    - -

    This browser parses comments - INCORRECTLY! Yes, I said incorrectly.

    - - - -

    The markup used was:

    - -
    -     <P>This browser parses comments <!-- -- --> INCORRECTLY!
    -     Yes, I said in<!-- -- -->correctly.</P>
    -
    - -

    If you take -- to be the comment delimiter, you can -easily see that the words in the middle are actually inside a -comment.

    - -

    Here is another:

    - -

    This browser parses comments - INCORRECTLY! Yes, I said incorrectly.

    - -

    The markup used was:

    - -
    -     <P>This browser parses comments
    -     <!---- -->INCORRECTLY! Yes, I said in<!------>correctly.</P>
    -
    - -

    Note that it is a serious error if this is the paragraph -immediately after the test line! There should be some commentary -(including a snippet of the source of the last test) between this -paragraph and the previous test.

    - -

    Here is yet another:

    - -

    This browser parses comments - INCORRECTLY! Yes, I said incorrectly.

    - -

    The markup used was:

    - -
    -     <P>This browser parses comments
    -     <!------>INCORRECTLY! Yes, I said in<!------>correctly.</P>
    -
    - -

    Note that it is a serious error if this is the paragraph -immediately after the test line! There should be some commentary -(including a snippet of the source of the last test) between this -paragraph and the previous test.

    - -

    And finally, here is yet another. Hopefully that should cover all bases...

    - -

    This browser parses comments - INCORRECTLY! Yes, I said incorrectly.

    - -

    The markup used was:

    - -
    -     <P>This browser parses comments
    -     <!-- ---->INCORRECTLY! Yes, I said in<!------>correctly.</P>
    -
    - -

    Note that it is a serious error if this is the paragraph -immediately after the test line! There should be some commentary -(including a snippet of the source of the last test) between this -paragraph and the previous test.

    - -

    2. Comments should not mess up whitespace collapsing!

    - -

    In the following, the words should not be spaced out any more than normal.

    - -

    These words are not separated by comments. They - - - - - - - - - should - - - - - - - - - be - - - - - - - - spaced - - - - - - - - out - - - - - - - - as - - - - - - - - - much - - - - - - - - - as - - - - - - - - these - - - - - - - - words.

    - -

    3. Comment Terminator

    - - -

    The comment delimiter is --, so the end delimited is -certainly not ->. In the following sentence, the word -INCORRECTLY should not appear:

    - -

    This browser parses comments correctly.

    - -

    The markup used was:

    - -
    -     <P>This browser parses comments <!-- -> INCORRECTLY!
    -     Yes, I said in<!- -->correctly.</P>
    -
    - -

    If you take -- to be the comment delimiter, you can -easily see that the words in the middle are actually inside a -comment.

    - - -

    Submit Results

    - -
    -

    How does your browser fare on this test? - - - -

    -
    - -
    -

    Up to the Evil Tests Page.

    -

    Bugzilla: Bug 2749 (parsing errors - reported fixed, awaiting strict DTD mode to verify) ; Bug 3304 (page does not display)

    -

    This page is maintained by Ian Hickson (py8ieh@bath.ac.uk).

    -

    Last updated in January 1999.

    - - - - diff --git a/htmlparser/tests/html/del001.html b/htmlparser/tests/html/del001.html deleted file mode 100644 index 6db987a8d791..000000000000 --- a/htmlparser/tests/html/del001.html +++ /dev/null @@ -1,14 +0,0 @@ - - -test DEL tag - - -test DEL
    -1normal text in body line 1. - -A This text is in DEL -
    B line 2 in DEL -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/del002.html b/htmlparser/tests/html/del002.html deleted file mode 100644 index 3b5a19307661..000000000000 --- a/htmlparser/tests/html/del002.html +++ /dev/null @@ -1,17 +0,0 @@ - - -Nested DEL - - -nested DEL
    -1normal text in body line 1. - -A This text is in DEL - -
    B line 2 in DEL -
    -C line 3 in DEL -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/del003.html b/htmlparser/tests/html/del003.html deleted file mode 100644 index aedf7363e332..000000000000 --- a/htmlparser/tests/html/del003.html +++ /dev/null @@ -1,12 +0,0 @@ - - - DEL strikeover text - - -DEL strikeover text -Normal text1 -
    text1 with strikeover
    deleted text
    text2 with strikeover
    -
    Normal text2 - - - diff --git a/htmlparser/tests/html/del004.html b/htmlparser/tests/html/del004.html deleted file mode 100644 index 76f2033882d1..000000000000 --- a/htmlparser/tests/html/del004.html +++ /dev/null @@ -1,13 +0,0 @@ - - -DEL an A tag - - -DEL an A tag, link still works
    -1normal text in body line 1. - -This A tag is deleted. - -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/del005.html b/htmlparser/tests/html/del005.html deleted file mode 100644 index b781b0199e8d..000000000000 --- a/htmlparser/tests/html/del005.html +++ /dev/null @@ -1,22 +0,0 @@ - - -Del a whole TABLE - - -Del a whole TABLE, no visual effect, strikeover on text in table
    -1normal text in body line 1. - -text in DEL - - - -
    This is the first cell in the table - This is the second cell in the table -
    This is the third cell in the table - This is the fourth cell in the table -
    -Text in DEL. -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/del006.html b/htmlparser/tests/html/del006.html deleted file mode 100644 index 6ec943d11183..000000000000 --- a/htmlparser/tests/html/del006.html +++ /dev/null @@ -1,21 +0,0 @@ - - -DEL table cell TD - - -DEL table cell TD, no visual effect, no strikeover on deleted text.
    -1normal text in body line 1. - - - - -
    This is the first cell in the table - - This is the second cell in the table - -
    This is the third cell in the table - This is the fourth cell in the table -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/del007.html b/htmlparser/tests/html/del007.html deleted file mode 100644 index bce5c32e2e89..000000000000 --- a/htmlparser/tests/html/del007.html +++ /dev/null @@ -1,21 +0,0 @@ - - -Del open TR - - -test DEL
    -1normal text in body line 1. - - - - - -
    This is the first cell in the table - - This is the second cell in the table -
    This is the third cell in the table - This is the fourth cell in the table -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/del008.html b/htmlparser/tests/html/del008.html deleted file mode 100644 index a2cd7f3f9640..000000000000 --- a/htmlparser/tests/html/del008.html +++ /dev/null @@ -1,22 +0,0 @@ - - -Del open TD - - -test DEL
    -1normal text in body line 1. - - - - -
    This is the first cell in the table - - This is the - - second cell in the table -
    This is the third cell in the table - This is the fourth cell in the table -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/del009.html b/htmlparser/tests/html/del009.html deleted file mode 100644 index 8c2d147dbc2b..000000000000 --- a/htmlparser/tests/html/del009.html +++ /dev/null @@ -1,19 +0,0 @@ - - -DEL inside table cell TD - - -DEL table cell TD, no visual effect, no strikeover on deleted text.
    -1normal text in body line 1. - - - - -
    This is the first cell in the table - This is deleted text -
    This is the third cell in the table - This is the fourth cell in the table -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/del010.html b/htmlparser/tests/html/del010.html deleted file mode 100644 index 1cc70553c61c..000000000000 --- a/htmlparser/tests/html/del010.html +++ /dev/null @@ -1,16 +0,0 @@ - - -DEL a P tag - - -DEL a P tag
    -1normal text in body line 1. - - A This text is in DEL -

    B line 2 in DEL -

    - C line 3 in DEL -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/del011.html b/htmlparser/tests/html/del011.html deleted file mode 100644 index 3f3a1b7607a9..000000000000 --- a/htmlparser/tests/html/del011.html +++ /dev/null @@ -1,23 +0,0 @@ - - -DEL cross scope with a table - - -DEL cross scope with a table
    -It starts before table, but ends inside table. -
    It does not effect the table, but the DEL is terminated after the table. -
    1normal text in body line 1. - -text in DEL. - - - - - -
    This is the first cell in the table - This cell 2
    This is the third cell in the table - This is the fourth cell in the table -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/doc001.html b/htmlparser/tests/html/doc001.html deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/htmlparser/tests/html/doc002.html b/htmlparser/tests/html/doc002.html deleted file mode 100644 index 4a2ffa575dc1..000000000000 --- a/htmlparser/tests/html/doc002.html +++ /dev/null @@ -1,10 +0,0 @@ - - -Welcome to Netscape - - -Text in BODY. - -Text after end BODY tag, visible in Nav4.0; - -Text after end HTML tag, visible in Nav4.0; diff --git a/htmlparser/tests/html/endswithcr.html b/htmlparser/tests/html/endswithcr.html deleted file mode 100644 index c7768fddf335..000000000000 --- a/htmlparser/tests/html/endswithcr.html +++ /dev/null @@ -1 +0,0 @@ -This file ends with a CR \ No newline at end of file diff --git a/htmlparser/tests/html/entity001.html b/htmlparser/tests/html/entity001.html deleted file mode 100644 index 2fa4cb562794..000000000000 --- a/htmlparser/tests/html/entity001.html +++ /dev/null @@ -1,12 +0,0 @@ - - -entity in bold as ", >, and so on. -
    Another line of text. -
    text with entity > should work -Tags can have entities in their attribut values, and it is -common to have a amps at the end of a CGI link. -
    -text -
    last text - - \ No newline at end of file diff --git a/htmlparser/tests/html/entity_attrlist.html b/htmlparser/tests/html/entity_attrlist.html deleted file mode 100644 index 0d72d26b9bb7..000000000000 --- a/htmlparser/tests/html/entity_attrlist.html +++ /dev/null @@ -1,396 +0,0 @@ - nbsp ---- no-break space = non-breaking space
    - - iexcl ---- inverted exclamation mark
    - cent ---- cent sign
    - pound ---- pound sign
    - curren ---- currency sign
    - yen ---- yen sign = yuan sign
    - brvbar ---- broken bar = broken vertical bar
    - - sect ---- section sign
    - uml ---- diaeresis = spacing diaeresis
    - - copy ---- copyright sign
    - ordf ---- feminine ordinal indicator
    - laquo ---- left-pointing double angle quotation mark - = left pointing guillemet
    - not ---- not sign
    - shy ---- soft hyphen = discretionary hyphen
    - - reg ---- registered sign = registered trade mark sign
    - - macr ---- macron = spacing macron = overline - = APL overbar
    - deg ---- degree sign
    - plusmn ---- plus-minus sign = plus-or-minus sign
    - - sup2 ---- superscript two = superscript digit two - = squared
    - sup3 ---- superscript three = superscript digit three - = cubed
    - acute ---- acute accent = spacing acute
    - - micro ---- micro sign
    - para ---- pilcrow sign = paragraph sign
    - - middot ---- middle dot = Georgian comma - = Greek middle dot
    - cedil ---- cedilla = spacing cedilla
    - sup1 ---- superscript one = superscript digit one
    - - ordm ---- masculine ordinal indicator
    - - raquo ---- right-pointing double angle quotation mark - = right pointing guillemet
    - frac14 ---- vulgar fraction one quarter - = fraction one quarter
    - frac12 ---- vulgar fraction one half - = fraction one half
    - frac34 ---- vulgar fraction three quarters - = fraction three quarters
    - iquest ---- inverted question mark - = turned question mark
    - Agrave ---- latin capital letter A with grave - = latin capital letter A grave
    - - Aacute ---- latin capital letter A with acute
    - - Acirc ---- latin capital letter A with circumflex
    - - Atilde ---- latin capital letter A with tilde
    - - Auml ---- latin capital letter A with diaeresis
    - - Aring ---- latin capital letter A with ring above - = latin capital letter A ring
    - - AElig ---- latin capital letter AE - = latin capital ligature AE
    - - Ccedil ---- latin capital letter C with cedilla
    - - Egrave ---- latin capital letter E with grave
    - - Eacute ---- latin capital letter E with acute
    - - Ecirc ---- latin capital letter E with circumflex
    - - Euml ---- latin capital letter E with diaeresis
    - - Igrave ---- latin capital letter I with grave
    - - Iacute ---- latin capital letter I with acute
    - - Icirc ---- latin capital letter I with circumflex
    - - Iuml ---- latin capital letter I with diaeresis
    - - ETH ---- latin capital letter ETH
    - Ntilde ---- latin capital letter N with tilde
    - - Ograve ---- latin capital letter O with grave
    - - Oacute ---- latin capital letter O with acute
    - - Ocirc ---- latin capital letter O with circumflex
    - - Otilde ---- latin capital letter O with tilde
    - - Ouml ---- latin capital letter O with diaeresis
    - - times ---- multiplication sign
    - Oslash ---- latin capital letter O with stroke - = latin capital letter O slash
    - - Ugrave ---- latin capital letter U with grave
    - - Uacute ---- latin capital letter U with acute
    - - Ucirc ---- latin capital letter U with circumflex
    - - Uuml ---- latin capital letter U with diaeresis
    - - Yacute ---- latin capital letter Y with acute
    - - THORN ---- latin capital letter THORN
    - - szlig ---- latin small letter sharp s = ess-zed
    - - agrave ---- latin small letter a with grave - = latin small letter a grave
    - - aacute ---- latin small letter a with acute
    - - acirc ---- latin small letter a with circumflex
    - - atilde ---- latin small letter a with tilde
    - - auml ---- latin small letter a with diaeresis
    - - aring ---- latin small letter a with ring above - = latin small letter a ring
    - - aelig ---- latin small letter ae - = latin small ligature ae
    - ccedil ---- latin small letter c with cedilla
    - - egrave ---- latin small letter e with grave
    - - eacute ---- latin small letter e with acute
    - - ecirc ---- latin small letter e with circumflex
    - - euml ---- latin small letter e with diaeresis
    - - igrave ---- latin small letter i with grave
    - - iacute ---- latin small letter i with acute
    - - icirc ---- latin small letter i with circumflex
    - - iuml ---- latin small letter i with diaeresis
    - - eth ---- latin small letter eth
    - ntilde ---- latin small letter n with tilde
    - - ograve ---- latin small letter o with grave
    - - oacute ---- latin small letter o with acute
    - - ocirc ---- latin small letter o with circumflex
    - - otilde ---- latin small letter o with tilde
    - - ouml ---- latin small letter o with diaeresis
    - - divide ---- division sign
    - oslash ---- latin small letter o with stroke
    - - ugrave ---- latin small letter u with grave
    - - uacute ---- latin small letter u with acute
    - - ucirc ---- latin small letter u with circumflex
    - - uuml ---- latin small letter u with diaeresis
    - - yacute ---- latin small letter y with acute
    - - thorn ---- latin small letter thorn with
    - - yuml ---- latin small letter y with diaeresis
    - - fnof ---- latin small f with hook = function - - Alpha ---- greek capital letter alpha
    - Beta ---- greek capital letter beta
    - Gamma ---- greek capital letter gamma
    - - Delta ---- greek capital letter delta
    - - Epsilon ---- greek capital letter epsilon
    - Zeta ---- greek capital letter zeta
    - Eta ---- greek capital letter eta
    - Theta ---- greek capital letter theta
    - - Iota ---- greek capital letter iota
    - Kappa ---- greek capital letter kappa
    - Lambda ---- greek capital letter lambda
    - - Mu ---- greek capital letter mu
    - Nu ---- greek capital letter nu
    - Xi ---- greek capital letter xi
    - Omicron ---- greek capital letter omicron
    - Pi ---- greek capital letter pi
    - Rho ---- greek capital letter rho
    - Sigma ---- greek capital letter sigma
    - - Tau ---- greek capital letter tau
    - Upsilon ---- greek capital letter upsilon
    - - Phi ---- greek capital letter phi
    - - Chi ---- greek capital letter chi
    - Psi ---- greek capital letter psi
    - - Omega ---- greek capital letter omega
    - - - alpha ---- greek small letter alpha
    - - beta ---- greek small letter beta
    - gamma ---- greek small letter gamma
    - - delta ---- greek small letter delta
    - - epsilon ---- greek small letter epsilon
    - - zeta ---- greek small letter zeta
    - eta ---- greek small letter eta
    - theta ---- greek small letter theta
    - - iota ---- greek small letter iota
    - kappa ---- greek small letter kappa
    - - lambda ---- greek small letter lambda
    - - mu ---- greek small letter mu
    - nu ---- greek small letter nu
    - xi ---- greek small letter xi
    - omicron ---- greek small letter omicron
    - pi ---- greek small letter pi
    - rho ---- greek small letter rho
    - sigmaf ---- greek small letter final sigma
    - - sigma ---- greek small letter sigma
    - - tau ---- greek small letter tau
    - upsilon ---- greek small letter upsilon
    - - phi ---- greek small letter phi
    - chi ---- greek small letter chi
    - psi ---- greek small letter psi
    - omega ---- greek small letter omega
    - - thetasym ---- greek small letter theta symbol
    - upsih ---- greek upsilon with hook symbol
    - piv ---- greek pi symbol
    - - bull ---- bullet = black small circle
    - - hellip ---- horizontal ellipsis = three dot leader
    - - prime ---- prime = minutes = feet
    - Prime ---- double prime = seconds = inches
    - - oline ---- overline = spacing overscore
    - - frasl ---- fraction slash
    - - weierp ---- script capital P = power set - image ---- blackletter capital I = imaginary part
    - real ---- blackletter capital R = real part symbol
    - trade ---- trade mark sign
    - alefsym ---- alef symbol = first transfinite cardinal
    - - larr ---- leftwards arrow
    - uarr ---- upwards arrow
    - rarr ---- rightwards arrow
    - darr ---- downwards arrow
    - harr ---- left right arrow
    ISOamsa - crarr ---- downwards arrow with corner leftwards - = carriage return
    NEW - lArr ---- leftwards double arrow
    - uArr ---- upwards double arrow
    ISOamsa - rArr ---- rightwards double arrow
    - - dArr ---- downwards double arrow
    ISOamsa - hArr ---- left right double arrow
    - - forall ---- for all
    - part ---- partial differential
    - exist ---- there exists
    - empty ---- empty set = null set = diameter
    - ISOamso - nabla ---- nabla = backward difference
    - - isin ---- element of
    - notin ---- not an element of
    - ni ---- contains as member
    - prod ---- n-ary product = product sign
    - sum ---- n-ary sumation
    ISOamsb - minus ---- minus sign
    - lowast ---- asterisk operator
    - radic ---- square root = radical sign
    - - prop ---- proportional to
    - infin ---- infinity
    - ang ---- angle
    ISOamso - and ---- logical and = wedge
    - or ---- logical or = vee
    - cap ---- intersection = cap
    - cup ---- union = cup
    - int ---- integral
    - there4 ---- therefore
    - sim ---- tilde operator = varies with = similar to
    - - cong ---- approximately equal to
    - asymp ---- almost equal to = asymptotic to
    - ISOamsr - ne ---- not equal to
    - equiv ---- identical to
    - le ---- less-than or equal to
    - ge ---- greater-than or equal to
    - - sub ---- subset of
    - sup ---- superset of
    - nsub ---- not a subset of
    - sube ---- subset of or equal to
    - supe ---- superset of or equal to
    - - oplus ---- circled plus = direct sum
    - otimes ---- circled times = vector product
    - perp ---- up tack = orthogonal to = perpendicular
    - - sdot ---- dot operator
    - - lceil ---- left ceiling = apl upstile
    - rceil ---- right ceiling
    - lfloor ---- left floor = apl downstile
    - rfloor ---- right floor
    - lang ---- left-pointing angle bracket = bra
    - rang ---- right-pointing angle bracket = ket
    - - loz ---- lozenge
    - - spades ---- black spade suit
    - clubs ---- black club suit = shamrock
    - hearts ---- black heart suit = valentine
    - diams ---- black diamond suit
    - - quot ---- quotation mark = APL quote
    - amp ---- ampersand
    - lt ---- less-than sign
    - gt ---- greater-than sign
    - - OElig ---- latin capital ligature OE
    - oelig ---- latin small ligature oe
    - Scaron ---- latin capital letter S with caron
    - scaron ---- latin small letter s with caron
    - Yuml ---- latin capital letter Y with diaeresis
    - - circ ---- modifier letter circumflex accent
    - tilde ---- small tilde
    - ensp ---- en space
    - emsp ---- em space
    - thinsp ---- thin space
    - zwnj ---- zero width non-joiner
    - - zwj ---- zero width joiner
    - lrm ---- left-to-right mark
    - rlm ---- right-to-left mark
    - ndash ---- en dash
    - mdash ---- em dash
    - lsquo ---- left single quotation mark
    - - rsquo ---- right single quotation mark
    - - sbquo ---- single low-9 quotation mark
    - ldquo ---- left double quotation mark
    - - rdquo ---- right double quotation mark
    - - bdquo ---- double low-9 quotation mark
    - dagger ---- dagger
    - Dagger ---- double dagger
    - permil ---- per mille sign
    - lsaquo ---- single left-pointing angle quotation mark
    - - rsaquo ---- single right-pointing angle quotation mark
    - - (in hexadecimal) represents the same character. - (in hexadecimal) represents the same character as well. - (in decimal) represents the Cyrillic capital letter "I". - (in hexadecimal) represents the Chinese character for water. - - diff --git a/htmlparser/tests/html/entity_list.html b/htmlparser/tests/html/entity_list.html deleted file mode 100644 index 5060d70316c0..000000000000 --- a/htmlparser/tests/html/entity_list.html +++ /dev/null @@ -1,394 +0,0 @@ - nbsp   ---- no-break space = non-breaking space
    - - iexcl ¡ ---- inverted exclamation mark
    - cent ¢ ---- cent sign
    - pound £ ---- pound sign
    - curren ¤ ---- currency sign
    - yen ¥ ---- yen sign = yuan sign
    - brvbar ¦ ---- broken bar = broken vertical bar
    - - sect § ---- section sign
    - uml ¨ ---- diaeresis = spacing diaeresis
    - - copy © ---- copyright sign
    - ordf ª ---- feminine ordinal indicator
    - laquo « ---- left-pointing double angle quotation mark - = left pointing guillemet
    - not ¬ ---- not sign
    - shy ­ ---- soft hyphen = discretionary hyphen
    - - reg ® ---- registered sign = registered trade mark sign
    - - macr ¯ ---- macron = spacing macron = overline - = APL overbar
    - deg ° ---- degree sign
    - plusmn ± ---- plus-minus sign = plus-or-minus sign
    - - sup2 ² ---- superscript two = superscript digit two - = squared
    - sup3 ³ ---- superscript three = superscript digit three - = cubed
    - acute ´ ---- acute accent = spacing acute
    - - micro µ ---- micro sign
    - para ¶ ---- pilcrow sign = paragraph sign
    - - middot · ---- middle dot = Georgian comma - = Greek middle dot
    - cedil ¸ ---- cedilla = spacing cedilla
    - sup1 ¹ ---- superscript one = superscript digit one
    - - ordm º ---- masculine ordinal indicator
    - - raquo » ---- right-pointing double angle quotation mark - = right pointing guillemet
    - frac14 ¼ ---- vulgar fraction one quarter - = fraction one quarter
    - frac12 ½ ---- vulgar fraction one half - = fraction one half
    - frac34 ¾ ---- vulgar fraction three quarters - = fraction three quarters
    - iquest ¿ ---- inverted question mark - = turned question mark
    - Agrave À ---- latin capital letter A with grave - = latin capital letter A grave
    - - Aacute Á ---- latin capital letter A with acute
    - - Acirc  ---- latin capital letter A with circumflex
    - - Atilde à ---- latin capital letter A with tilde
    - - Auml Ä ---- latin capital letter A with diaeresis
    - - Aring Å ---- latin capital letter A with ring above - = latin capital letter A ring
    - - AElig Æ ---- latin capital letter AE - = latin capital ligature AE
    - - Ccedil Ç ---- latin capital letter C with cedilla
    - - Egrave È ---- latin capital letter E with grave
    - - Eacute É ---- latin capital letter E with acute
    - - Ecirc Ê ---- latin capital letter E with circumflex
    - - Euml Ë ---- latin capital letter E with diaeresis
    - - Igrave Ì ---- latin capital letter I with grave
    - - Iacute Í ---- latin capital letter I with acute
    - - Icirc Î ---- latin capital letter I with circumflex
    - - Iuml Ï ---- latin capital letter I with diaeresis
    - - ETH Ð ---- latin capital letter ETH
    - Ntilde Ñ ---- latin capital letter N with tilde
    - - Ograve Ò ---- latin capital letter O with grave
    - - Oacute Ó ---- latin capital letter O with acute
    - - Ocirc Ô ---- latin capital letter O with circumflex
    - - Otilde Õ ---- latin capital letter O with tilde
    - - Ouml Ö ---- latin capital letter O with diaeresis
    - - times × ---- multiplication sign
    - Oslash Ø ---- latin capital letter O with stroke - = latin capital letter O slash
    - - Ugrave Ù ---- latin capital letter U with grave
    - - Uacute Ú ---- latin capital letter U with acute
    - - Ucirc Û ---- latin capital letter U with circumflex
    - - Uuml Ü ---- latin capital letter U with diaeresis
    - - Yacute Ý ---- latin capital letter Y with acute
    - - THORN Þ ---- latin capital letter THORN
    - - szlig ß ---- latin small letter sharp s = ess-zed
    - - agrave à ---- latin small letter a with grave - = latin small letter a grave
    - - aacute á ---- latin small letter a with acute
    - - acirc â ---- latin small letter a with circumflex
    - - atilde ã ---- latin small letter a with tilde
    - - auml ä ---- latin small letter a with diaeresis
    - - aring å ---- latin small letter a with ring above - = latin small letter a ring
    - - aelig æ ---- latin small letter ae - = latin small ligature ae
    - ccedil ç ---- latin small letter c with cedilla
    - - egrave è ---- latin small letter e with grave
    - - eacute é ---- latin small letter e with acute
    - - ecirc ê ---- latin small letter e with circumflex
    - - euml ë ---- latin small letter e with diaeresis
    - - igrave ì ---- latin small letter i with grave
    - - iacute í ---- latin small letter i with acute
    - - icirc î ---- latin small letter i with circumflex
    - - iuml ï ---- latin small letter i with diaeresis
    - - eth ð ---- latin small letter eth
    - ntilde ñ ---- latin small letter n with tilde
    - - ograve ò ---- latin small letter o with grave
    - - oacute ó ---- latin small letter o with acute
    - - ocirc ô ---- latin small letter o with circumflex
    - - otilde õ ---- latin small letter o with tilde
    - - ouml ö ---- latin small letter o with diaeresis
    - - divide ÷ ---- division sign
    - oslash ø ---- latin small letter o with stroke
    - - ugrave ù ---- latin small letter u with grave
    - - uacute ú ---- latin small letter u with acute
    - - ucirc û ---- latin small letter u with circumflex
    - - uuml ü ---- latin small letter u with diaeresis
    - - yacute ý ---- latin small letter y with acute
    - - thorn þ ---- latin small letter thorn with
    - - yuml ÿ ---- latin small letter y with diaeresis
    - - fnof ƒ ---- latin small f with hook = function - - Alpha Α ---- greek capital letter alpha
    - Beta Β ---- greek capital letter beta
    - Gamma Γ ---- greek capital letter gamma
    - - Delta Δ ---- greek capital letter delta
    - - Epsilon Ε ---- greek capital letter epsilon
    - Zeta Ζ ---- greek capital letter zeta
    - Eta Η ---- greek capital letter eta
    - Theta Θ ---- greek capital letter theta
    - - Iota Ι ---- greek capital letter iota
    - Kappa Κ ---- greek capital letter kappa
    - Lambda Λ ---- greek capital letter lambda
    - - Mu Μ ---- greek capital letter mu
    - Nu Ν ---- greek capital letter nu
    - Xi Ξ ---- greek capital letter xi
    - Omicron Ο ---- greek capital letter omicron
    - Pi Π ---- greek capital letter pi
    - Rho Ρ ---- greek capital letter rho
    - Sigma Σ ---- greek capital letter sigma
    - - Tau Τ ---- greek capital letter tau
    - Upsilon Υ ---- greek capital letter upsilon
    - - Phi Φ ---- greek capital letter phi
    - - Chi Χ ---- greek capital letter chi
    - Psi Ψ ---- greek capital letter psi
    - - Omega Ω ---- greek capital letter omega
    - - - alpha α ---- greek small letter alpha
    - - beta β ---- greek small letter beta
    - gamma γ ---- greek small letter gamma
    - - delta δ ---- greek small letter delta
    - - epsilon ε ---- greek small letter epsilon
    - - zeta ζ ---- greek small letter zeta
    - eta η ---- greek small letter eta
    - theta θ ---- greek small letter theta
    - - iota ι ---- greek small letter iota
    - kappa κ ---- greek small letter kappa
    - - lambda λ ---- greek small letter lambda
    - - mu μ ---- greek small letter mu
    - nu ν ---- greek small letter nu
    - xi ξ ---- greek small letter xi
    - omicron ο ---- greek small letter omicron
    - pi π ---- greek small letter pi
    - rho ρ ---- greek small letter rho
    - sigmaf ς ---- greek small letter final sigma
    - - sigma σ ---- greek small letter sigma
    - - tau τ ---- greek small letter tau
    - upsilon υ ---- greek small letter upsilon
    - - phi φ ---- greek small letter phi
    - chi χ ---- greek small letter chi
    - psi ψ ---- greek small letter psi
    - omega ω ---- greek small letter omega
    - - thetasym ϑ ---- greek small letter theta symbol
    - upsih ϒ ---- greek upsilon with hook symbol
    - piv ϖ ---- greek pi symbol
    - - bull • ---- bullet = black small circle
    - - hellip … ---- horizontal ellipsis = three dot leader
    - - prime ′ ---- prime = minutes = feet
    - Prime ″ ---- double prime = seconds = inches
    - - oline ‾ ---- overline = spacing overscore
    - - frasl ⁄ ---- fraction slash
    - - weierp ℘ ---- script capital P = power set - image ℑ ---- blackletter capital I = imaginary part
    - real ℜ ---- blackletter capital R = real part symbol
    - trade ™ ---- trade mark sign
    - alefsym ℵ ---- alef symbol = first transfinite cardinal
    - - larr ← ---- leftwards arrow
    - uarr ↑ ---- upwards arrow
    - rarr → ---- rightwards arrow
    - darr ↓ ---- downwards arrow
    - harr ↔ ---- left right arrow
    ISOamsa - crarr ↵ ---- downwards arrow with corner leftwards - = carriage return
    NEW - lArr ⇐ ---- leftwards double arrow
    - uArr ⇑ ---- upwards double arrow
    ISOamsa - rArr ⇒ ---- rightwards double arrow
    - - dArr ⇓ ---- downwards double arrow
    ISOamsa - hArr ⇔ ---- left right double arrow
    - - forall ∀ ---- for all
    - part ∂ ---- partial differential
    - exist ∃ ---- there exists
    - empty ∅ ---- empty set = null set = diameter
    - ISOamso - nabla ∇ ---- nabla = backward difference
    - - isin ∈ ---- element of
    - notin ∉ ---- not an element of
    - ni ∋ ---- contains as member
    - prod ∏ ---- n-ary product = product sign
    - sum ∑ ---- n-ary sumation
    ISOamsb - minus − ---- minus sign
    - lowast ∗ ---- asterisk operator
    - radic √ ---- square root = radical sign
    - - prop ∝ ---- proportional to
    - infin ∞ ---- infinity
    - ang ∠ ---- angle
    ISOamso - and ∧ ---- logical and = wedge
    - or ∨ ---- logical or = vee
    - cap ∩ ---- intersection = cap
    - cup ∪ ---- union = cup
    - int ∫ ---- integral
    - there4 ∴ ---- therefore
    - sim ∼ ---- tilde operator = varies with = similar to
    - - cong ≅ ---- approximately equal to
    - asymp ≈ ---- almost equal to = asymptotic to
    - ISOamsr - ne ≠ ---- not equal to
    - equiv ≡ ---- identical to
    - le ≤ ---- less-than or equal to
    - ge ≥ ---- greater-than or equal to
    - - sub ⊂ ---- subset of
    - sup ⊃ ---- superset of
    - nsub ⊄ ---- not a subset of
    - sube ⊆ ---- subset of or equal to
    - supe ⊇ ---- superset of or equal to
    - - oplus ⊕ ---- circled plus = direct sum
    - otimes ⊗ ---- circled times = vector product
    - perp ⊥ ---- up tack = orthogonal to = perpendicular
    - - sdot ⋅ ---- dot operator
    - - lceil ⌈ ---- left ceiling = apl upstile
    - rceil ⌉ ---- right ceiling
    - lfloor ⌊ ---- left floor = apl downstile
    - rfloor ⌋ ---- right floor
    - lang 〈 ---- left-pointing angle bracket = bra
    - rang 〉 ---- right-pointing angle bracket = ket
    - - loz ◊ ---- lozenge
    - - spades ♠ ---- black spade suit
    - clubs ♣ ---- black club suit = shamrock
    - hearts ♥ ---- black heart suit = valentine
    - diams ♦ ---- black diamond suit
    - - quot " ---- quotation mark = APL quote
    - amp & ---- ampersand
    - lt < ---- less-than sign
    - gt > ---- greater-than sign
    - - OElig Π---- latin capital ligature OE
    - oelig œ ---- latin small ligature oe
    - Scaron Š ---- latin capital letter S with caron
    - scaron š ---- latin small letter s with caron
    - Yuml Ÿ ---- latin capital letter Y with diaeresis
    - - circ ˆ ---- modifier letter circumflex accent
    - tilde ˜ ---- small tilde
    - ensp   ---- en space
    - emsp   ---- em space
    - thinsp   ---- thin space
    - zwnj ‌ ---- zero width non-joiner
    - - zwj ‍ ---- zero width joiner
    - lrm ‎ ---- left-to-right mark
    - rlm ‏ ---- right-to-left mark
    - ndash – ---- en dash
    - mdash — ---- em dash
    - lsquo ‘ ---- left single quotation mark
    - - rsquo ’ ---- right single quotation mark
    - - sbquo ‚ ---- single low-9 quotation mark
    - ldquo “ ---- left double quotation mark
    - - rdquo ” ---- right double quotation mark
    - - bdquo „ ---- double low-9 quotation mark
    - dagger † ---- dagger
    - Dagger ‡ ---- double dagger
    - permil ‰ ---- per mille sign
    - lsaquo ‹ ---- single left-pointing angle quotation mark
    - - rsaquo › ---- single right-pointing angle quotation mark
    - - å (in hexadecimal) represents the same character. - å (in hexadecimal) represents the same character as well. - И (in decimal) represents the Cyrillic capital letter "I". - 水 (in hexadecimal) represents the Chinese character for water. diff --git a/htmlparser/tests/html/fieldset001.html b/htmlparser/tests/html/fieldset001.html deleted file mode 100644 index cfc2c115432f..000000000000 --- a/htmlparser/tests/html/fieldset001.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - test filedset001.html - - -fieldset and legend -Text after form -
    - Text before fieldset -
    - Personal Information - Last Name: -
    First Name: -
    Address: -
    ...more personal information... -
    - Text after fieldset -
    -Text after form - - \ No newline at end of file diff --git a/htmlparser/tests/html/fieldset002.html b/htmlparser/tests/html/fieldset002.html deleted file mode 100644 index fb0ab8ce622f..000000000000 --- a/htmlparser/tests/html/fieldset002.html +++ /dev/null @@ -1,56 +0,0 @@ - - - - test filedset001.html - - -fieldset and legend, sample from
    -http://www.w3.org/TR/WD-html40/interact/forms.html#edef-FIELDSET -
    Text after form - -
    -
    - Personal Information - Last Name: - First Name: - Address: - ...more personal information... -
    -
    - Medical History - Smallpox - Mumps - Dizziness - Sneezing - ...more medical history... -
    -
    - Current Medication - Are you currently taking any medication? - Yes - No - - If you are currently taking medication, please indicate - it in the space below: - -
    -
    - -Text after form - - \ No newline at end of file diff --git a/htmlparser/tests/html/fieldset003.html b/htmlparser/tests/html/fieldset003.html deleted file mode 100644 index a4504055b954..000000000000 --- a/htmlparser/tests/html/fieldset003.html +++ /dev/null @@ -1,26 +0,0 @@ - - - - test filedset001.html - - -fieldset can be nested
    -http://www.w3.org/TR/WD-html40/interact/forms.html#edef-FIELDSET -
    Text after form - -
    -
    - Personal Information -
    - Name - Last Name: - First Name: -
    - Address: - ...more personal information... -
    -
    - -Text after form - - \ No newline at end of file diff --git a/htmlparser/tests/html/form001.html b/htmlparser/tests/html/form001.html deleted file mode 100644 index 75501cd67f16..000000000000 --- a/htmlparser/tests/html/form001.html +++ /dev/null @@ -1,13 +0,0 @@ - - - test DEL tag - - - test Form
    -
    - Please input user name -
    - - 2normal text in body line 2. - - diff --git a/htmlparser/tests/html/form002.html b/htmlparser/tests/html/form002.html deleted file mode 100644 index 8eb1c1479c5a..000000000000 --- a/htmlparser/tests/html/form002.html +++ /dev/null @@ -1,22 +0,0 @@ - - - test DEL tag - - - test Form
    - Forms cannot be nested. form tags outside form are ignored. -
    - Please input user name - - - inner open tag ignored. - Please input password - -
    - form terminated by inner end tag. - input 3 - - The outer end tag ignored. -
    2normal text in body line 2. - - diff --git a/htmlparser/tests/html/head01.html b/htmlparser/tests/html/head01.html deleted file mode 100644 index 325c59422374..000000000000 --- a/htmlparser/tests/html/head01.html +++ /dev/null @@ -1,9 +0,0 @@ - - - this is title. -text in head. - - -text - - \ No newline at end of file diff --git a/htmlparser/tests/html/head02.html b/htmlparser/tests/html/head02.html deleted file mode 100644 index bd39951c150a..000000000000 --- a/htmlparser/tests/html/head02.html +++ /dev/null @@ -1,11 +0,0 @@ - - - this is title. -text in head. - - -text1 - -text2 - - \ No newline at end of file diff --git a/htmlparser/tests/html/head03.html b/htmlparser/tests/html/head03.html deleted file mode 100644 index 442c9835f003..000000000000 --- a/htmlparser/tests/html/head03.html +++ /dev/null @@ -1,9 +0,0 @@ - - -TITLE-title03 - - -The HEAD tag has attributes. -
    The Title tag can also has attributes. - - \ No newline at end of file diff --git a/htmlparser/tests/html/home01.html b/htmlparser/tests/html/home01.html deleted file mode 100644 index b5db1eb7ad67..000000000000 --- a/htmlparser/tests/html/home01.html +++ /dev/null @@ -1,976 +0,0 @@ - - - -Welcome to Netscape - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -Welcome to Netscape - Navigation Banner - - - - - - - - - - -
    -
    -
    -
    - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - - September 22, 1997 -
    - - - - - - - - -Publishing Suite Now Available - - - - - - - - -
      - - - - - - - Netscape Publishing Suite - the all-in-one solution for creating and publishing web sites - is now available in retail stores nationwide. - - -
    -
    - ABC News -
    - - - - - -Netscape delivers the new CommerceXpert family of Internet commerce products developed by Actra - including PublishingXpert 2.0, SellerXpert, and ECXpert. - - - -
    - - - - - - -Netscape launches a preview of Industry Watch by Individual, a personalized service providing in-depth business news and information. - - - - -
    - - - - - - - - - -Download Netscape French Communicator 4.03 for Windows 95 and NT - and take advantage of dynamic web content. - - - - -
    - - - - - - - - -Netscape works with leading sales force automation vendors to deliver next-generation tools that will automate and manage sales environments. - - -
    - - - - - More news... - - Go to a non-layers version of this page. - - - -
    -
    -
    - - -
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
      -
    - - -Jim Barksdale - -
    -
    Netscape
    Columns

    - -In this week's The Main Thing column, Jim Barksdale talks about directory services. Read all the Netscape Columns to keep up with Internet issues and trends.

    - -
    - - -

    -  - - -Tune Up to Communicator -
    -
    -Netscape
    Products

    - -Tune Up to Communicator
    -Get Any Netscape Product
    -For Subscribers Only

    -

    -And tune up your Internet connection with ISP Select. - -
    -

    -  - - -
    -
    Netscape
    Store Special

    - - - - -Looking for a rich Internet experience? Purchase Netscape Communicator Deluxe Edition and get a $30 rebate. Plus save $10 on the Official Netscape Communicator 4.0 Professional Edition book. - - - -

    -

    -  -
    - - - - - - - -
    -  - -

    - - - -
    - - - - -
    -
    Corporate Sales: 650/937-2555 Personal Sales: 650/937-3777
    -Government Sales: 650/937-3678
    If you have any questions please visit -Customer Service or contact your nearest -sales office. • Copyright © 1997 -Netscape Communications -Corporation. • This site powered by Netscape SuiteSpot servers.
    - -

    -
    -

    - - -
    - - - - - - - -New to the Web? - - - - - - - - - - - - -Turn Your In-Box Into a News Center - - -Netcenter - - - - - - - -In-Box Direct Delivers The New York Times - - - - -

    -Get the best information on the Web without searching. Netscape In-Box Direct offers rich HTML content from USA Today, the Wall Street Journal, Sports Illustrated, ELLE International, and dozens more delivered directly to your email in-box. Choose the publications you want, and Netscape In-Box Direct will take care of the rest. -

    - - - - -Win a BMW Z3 1.9 Roadster - -
    - -Sign up for Netcenter today and register to win a BMW Z3 1.9 Roadster, PC workstations from Hewlett-Packard, trips from Travelocity and Continental Airlines, and music from CDnow. -

    - - -Software Store - -
    - -Looking for new collaboration software? Web development tools? Netscape's Software Store has all your business solutions software. - -

    - -Netscape Guide by Yahoo! - -
    - -Want a hot stock tip? Head over to the finance section of Netscape Guide by Yahoo! for a rich assortment of money management and investment resources. - - - - - - - - - -Tune in to Netcenter, the Best of the Net for: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - - Community - -
    - - - - - Virtual Office - -

    -

    - -
    - Software -
    -
    - - - - SmartUpdate - -
    - - - - Software Store -

    - -

    - -
    - Content -
    -
    - - - -Industry Watch - -
    - - - Channel Finder - -
    - - - In-Box Direct - -
    - - - - Internet Guide - -
    - - - - Net Search -

    - -

    - -
    - - Available Soon - -
    -
    - - - - Professional Community - -
    - -
    - - - - - -

    Netcenter
    - - - - -
    - - - - - - - - diff --git a/htmlparser/tests/html/html001.html b/htmlparser/tests/html/html001.html deleted file mode 100644 index db589d3920ba..000000000000 --- a/htmlparser/tests/html/html001.html +++ /dev/null @@ -1,8 +0,0 @@ - - - HTML-html04 - - -html tag has attributes. - - \ No newline at end of file diff --git a/htmlparser/tests/html/imgmap001.html b/htmlparser/tests/html/imgmap001.html deleted file mode 100644 index b826c9a4e969..000000000000 --- a/htmlparser/tests/html/imgmap001.html +++ /dev/null @@ -1,13 +0,0 @@ - - - - -

    area element with attributes set for circular shape of 80,80,60.

    - image for imagemap - - imagemap - - - diff --git a/htmlparser/tests/html/ins001.html b/htmlparser/tests/html/ins001.html deleted file mode 100644 index b7be3a2edd31..000000000000 --- a/htmlparser/tests/html/ins001.html +++ /dev/null @@ -1,14 +0,0 @@ - - -test INS tag - - -test INS, bi directional override
    -1normal text in body line 1. - -A This text is in INS -
    B line 2 in INS -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/ins002.html b/htmlparser/tests/html/ins002.html deleted file mode 100644 index c759228945cf..000000000000 --- a/htmlparser/tests/html/ins002.html +++ /dev/null @@ -1,18 +0,0 @@ - - -test INS tag - - -test INS, bi directional override
    -1normal text in body line 1. - -A This text is in INS - - C This text is in INS2 -
    D line 2 in INS2 -
    -
    B line 2 in INS -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/ins003.html b/htmlparser/tests/html/ins003.html deleted file mode 100644 index 8574eaef4b50..000000000000 --- a/htmlparser/tests/html/ins003.html +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - del strikeover text - - -Normal text1 -
    text with underline -
    Normal text2 - - diff --git a/htmlparser/tests/html/insdel01.html b/htmlparser/tests/html/insdel01.html deleted file mode 100644 index 9aa6ba964eaf..000000000000 --- a/htmlparser/tests/html/insdel01.html +++ /dev/null @@ -1,16 +0,0 @@ - - -test INS tag - - -test INS
    -1normal text in body line 1. - -A This text is in INS - -
    B line 2 in INS -
    -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/insdel02.html b/htmlparser/tests/html/insdel02.html deleted file mode 100644 index 4e21163812e0..000000000000 --- a/htmlparser/tests/html/insdel02.html +++ /dev/null @@ -1,16 +0,0 @@ - - -INS DEL /INS /DEL crossing scope - - -test INS
    -1normal text in body line 1. - -A This text is in INS - -
    B line 2 in INS -
    -2normal text in body line 2. - - - diff --git a/htmlparser/tests/html/java.html b/htmlparser/tests/html/java.html deleted file mode 100644 index 4df50a002dba..000000000000 --- a/htmlparser/tests/html/java.html +++ /dev/null @@ -1,25 +0,0 @@ - - - -  - - - - - - - </COMMENT> - - - - - - - - - - - </COMMENT> - - - diff --git a/htmlparser/tests/html/layer001.html b/htmlparser/tests/html/layer001.html deleted file mode 100644 index dc9790698fb2..000000000000 --- a/htmlparser/tests/html/layer001.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - -Test Layer - - - - -Layers do NOT imply new paragraph, nor line break. - - -Note, no line break here. Text-2 - - - -After layer, text-3 - - - diff --git a/htmlparser/tests/html/layer002.html b/htmlparser/tests/html/layer002.html deleted file mode 100644 index 012d84451a9e..000000000000 --- a/htmlparser/tests/html/layer002.html +++ /dev/null @@ -1,28 +0,0 @@ - - - - -Test Layer002 - - - - -In Vav4.0, SPAN tags go thour layers. -
    It remain in effect in its whole scope, including layer. - -Text in color=red SPAN. - - - -Text between 2 LAYERs. - - - - -

    -Text not in layer, but still in SPAN -

    -
    -Text after closing font tag. - - diff --git a/htmlparser/tests/html/layer003.html b/htmlparser/tests/html/layer003.html deleted file mode 100644 index 846a4def75ef..000000000000 --- a/htmlparser/tests/html/layer003.html +++ /dev/null @@ -1,29 +0,0 @@ - - - - -Test Layer003 - - - - -In Vav4.0, layers are in the same "span space" as their parents. -
    That means spans can be closed inside layer, and affect layers' parents. - - -Text in color=red SPAN. - - - -Text between 2 LAYERs. - - - - -

    -Text not in layer -

    - - - - diff --git a/htmlparser/tests/html/layer01.html b/htmlparser/tests/html/layer01.html deleted file mode 100644 index 0a503a7eb4c5..000000000000 --- a/htmlparser/tests/html/layer01.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - -Welcome to Netscape - - - - -Text-1 - - - -
    Text-2 - - - -

    -no-layer-text-3 - - - diff --git a/htmlparser/tests/html/list001.html b/htmlparser/tests/html/list001.html deleted file mode 100644 index e2d549c797a6..000000000000 --- a/htmlparser/tests/html/list001.html +++ /dev/null @@ -1,33 +0,0 @@ - -Welcome to Hewlett-Packard - - - -This file was created on 9/16/97 for bug #85117, assert in parser -when closing dd tag. - -
    This is from the end of the page www.hp.com -
    Note, an optional closing dd tag is missing. - -

  • - -
    - -
    [Search HP] - -[Top] -[Contact HP] -[Copyright] - - -
    - -
    - - -Note: no line break here. - -
    Now wrapped with a PRE tag. -
    -first line
    -
    invalide tags are ignored by 4.0 -
    - - -Note: no line break here. - \ No newline at end of file diff --git a/htmlparser/tests/html/q001.html b/htmlparser/tests/html/q001.html deleted file mode 100644 index e7cd998188bd..000000000000 --- a/htmlparser/tests/html/q001.html +++ /dev/null @@ -1,18 +0,0 @@ - - -test BLOCKQUOTE and Q tag - - -test BLOCKQUOTE and Q tag
    -1normal text in body line 1. -
    -A This text is in blockquote -
    B line 2 -
    -2normal text in body line 2. - -this text is in Q tag - -3normal text in body line 3. - - diff --git a/htmlparser/tests/html/quote001.html b/htmlparser/tests/html/quote001.html deleted file mode 100644 index 6b591b9193cb..000000000000 --- a/htmlparser/tests/html/quote001.html +++ /dev/null @@ -1,51 +0,0 @@ - - - HTML test for quoted string - - - - This was created on 9/12/97, for Xena bug # 85121 -
    Testing the quoted value string. -
    tag A is used so you can check the string by moving mouse on the link. -
    use view source or text editor to compare with browser display. - -
    The goal is to be compatible with Navigator 4.0 -
    Test results are compared with Nav4.0 display. Relavent Nav4.0 source - files are ns\lib\libparse\pa_parse.c and others. -
    Differences between 4.0 and xena are indecated. Search for XENA60 - to find all the defferences. - -

    comments from ns\xena\lego\src\lego\html\scanner\HTMLScanner.java: -
    // For quoted value string : -
    // Double/single quote only take effect as the first char -
    // of the value string, -
    // Quoted string is terminated by the second double/single quote -
    // respectively. -
    // In other places, double/single quote is treated literally. -
    // Entity & quot; is always treated literally, enven it is the -
    // first char. -
    // -
    // Unquoted value string is terminated by while space, or '>' sign. -

    - - Test lines: - -
    10(quote / quote ) good syntax: both quotes striped from string -
    20( / ) value not quoted, not recommented, but still good syntax. -
    30 - Navigator 4.0 value string trunketed at 82 characters XENA60 limit is MAX_STRING_LENGTH = 2000. - -
    40 The following are illegal HTML, but handled by Nav4.0 in different ways. -
    50( / ) string not quoted, whight space terminates the value -
    60( / ) string not quoted, CR terminates the string -
    70(escaped-quote / quote ) both quotes remain in string -
    80(quote / ) - move mouse on this. If we didn't have a quote in the text, the value string would - run through the end of the A tag. -
    90(quote / escaped-quote ) same as above, escaped quote does NOT terminate quote -
    100( / quote) if no opening quote, quotes are included in string -
    110( / quote) no opening quote, value terminated at right brack - - - diff --git a/htmlparser/tests/html/quote002.html b/htmlparser/tests/html/quote002.html deleted file mode 100644 index 182be7f50452..000000000000 --- a/htmlparser/tests/html/quote002.html +++ /dev/null @@ -1,33 +0,0 @@ - - - HTML test for quoted string - - - - This was created on 9/15/97, for Xena bug # 85121 -
    Testing the quoted value string. -
    tag A is used so you can check the string by moving mouse on the link. -
    use view source or text editor to compare with browser display. -
    10 2 quoted string stick together -
    20 2 quoted string separated by space -
    30 Navigator doesn't support nested quote. -
    40 quoted string can have escaped quote. -
    50 unquoted string can have escaped quote. -
    60 unquoted string can have other entities. -
    70 entity missing ';', ended by > -
    80 entity missing ';', not treated as entity inside a value word. -
    90 ending entity with a space, missing ';', value terminated. -
    100 ending entity with a '=', missing ';', value not terminated. -
    110 unquoted script is treated as text. -
    120 Navigator 4.0 does not support script - entity in quoted string. - - -
    130 this is fisrt quote " and second quote " in text area. -
    140 this is fisrt escaped-quote " and second escaped-quote " in text area. - -
    -
    In the following test line, quote is the last char in the html file. -
    Navigator 4.0 displace the line as text. -
    XENA60 appends a closing quote and display it as link with empty href="". -
    150 - - - - - - ResidualStyle - - -  -
    - - - -
    Residual Style 
    - -

    This page is the topmost page for -testing residual style. Residual style handling refers to automatic document -conversions performed by Gecko when encounters mal-formed HTML fontstyle -elements (explicit style tags -- not CSS). For example: -

    <html><body><b><div>text</div></b></body></html> -
      -
      - - - - - - - - - - - - -
    -
    Expected Result
    -
    -
    Result defined by HTML4.0
    -
    div text herediv text here
    - -

    According to the spec, <b> is -a fontstyle element, whereas <div> is a block element. As a rule, fontstyle -tags can't contain block elements, which means that the preceeding example -is malformed. In order to remain backward compatible and simultaneously -maintain well-formed documents, Gecko may have to rewrite the document -to be correct. The prior example is rewritten by Gecko like this: -

    <html><body><div><b>text</b></div></body></html> -
      -
      - - - - -
    How To Create Your Own Tests
    - -

    You can make your own residual style -tests either manually, or by using the htmlgen program that is checked -into the tree at mozilla\htmlparser\htmlgen. Most residual style problems -fall into one of two categories: 1) illegal containment; 2) malformed closures. -Note that these problems can exist for fontstyle, phrasal and special tags -(see the list below). Also note that navigator behavior is very inconsistent -with respect to these problems, so don't assume that one testcase covers -all cases. -

    Illegal Containment Sample -
    This example shows a <b> containing -a <div>, which is illegal under HTML. -

    <html><body><b><div>text</div></b></body></html> -

    Malformed Containment Sample -
    This example shows misplaced close -tags, which often wreak havoc with style in navigator. -

    <html><body><b><div>text1</i>text2</div></body></html> -
      -
      - - - - - - - - - - - - - - - - - - -
    List of FontStyle Tags: <b>, <big>, <i>, <s>, -<small>, <strike>, <tt>, <u>
    List of Phrase Tags:<abbr>, <acronym>, <cite>, -<code>, <dfn>, <em>, <kbd>, <samp>, <strong>, <var>
    List of Special Tags:<a>, <applet>, <basefont>, -<bdo>, <br>, <font>, <iframe>, <img>, <obect>, <map>, -<q>, <script>, <span>, <sub>, <sup>
    - -
      -
      - - - - -
    Residual Style Testcases
    - -

    The following testcases were taken -from real bugs. In most cases, these have been reduced to a minimum sample, -and they have been linked together so that you compare them in sequence -against other browsers. -

    Click on any of the following links -to begin... -
      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Bug466Bug991Bug1239Bug1259Bug2419Bug2447Bug3073
    Bug4814Bug4809Bug4958Bug5859Bug6233Bug6925Bug7447
    Bug7723Bug7724Bug7823Bug7889Bug8080Bug8056Bug8681
    Bug8738Bug8771Bug8913Bug8996Bug9563Bug10049Bug10324
    Bug11381Bug12118Bug12632Bug12468Bug13107Bug14276Bug14636
    Bug14981Bug18159Bug18185Bug18403Bug18865Bug19172Bug19194
    Bug20030Bug20178Bug20199Bug21186Bug21318Bug21424Bug21689
    Bug21692Bug21779Bug22025Bug22142Bug22157Bug23529Bug23680
    Bug23780Bug23831Bug24003
    - -

    -


    - - - - - - -
    Residual style: This term describes the case where -malformed fontstyle elements that get closed out by block elements need -to be reopened later in the document. The term can about for two reasons: -1) because style gets reopened "residually" -- later in the document; 2) -because explicit style tags tend to leave an unpleasant "residue" on html -documents.
    - - - diff --git a/htmlparser/tests/html/span001.html b/htmlparser/tests/html/span001.html deleted file mode 100644 index e0a09c843a82..000000000000 --- a/htmlparser/tests/html/span001.html +++ /dev/null @@ -1,30 +0,0 @@ - - - - -Test SPAN001 - - - - -

    -In HTML standard, SPANs are not allowed to cross Paragraphs. -
    In Vav4.0, SPAN tags go thour paragraphs and other block-level elements, -except tbles. -
    It be compatable with Nav4.0 and HTML standard, HTMLParser -close all SPANs before open a new paragraph, and reopen SPANs -inside the new paragraph -
    - - - Text ater the color=red SPAN is opend. -

    - Text in a new paragraph. -

    -

    - Text after the new paragrapg is closed, still in color=red SPAN. - -
    -Text ater the color=red SPAN is closed. - - diff --git a/htmlparser/tests/html/span002.html b/htmlparser/tests/html/span002.html deleted file mode 100644 index 5f3c497ff843..000000000000 --- a/htmlparser/tests/html/span002.html +++ /dev/null @@ -1,21 +0,0 @@ - - - - -Test SPAN001 - - - - -Normal text - -Text after the color=red SPAN is opend. -

    - Text in a new paragraph. -

    - Text after the new paragrapg is closed, still in color=red SPAN. - -Text ater the color=red SPAN is closed. -
    another line. - - diff --git a/htmlparser/tests/html/span003.html b/htmlparser/tests/html/span003.html deleted file mode 100644 index c4ea303d8c97..000000000000 --- a/htmlparser/tests/html/span003.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - -Test SPAN001 - - - - -

    - - bold - italic - B closed(but actualy it closed the last span tag, which is I tag. - I closed - - diff --git a/htmlparser/tests/html/strike002.html b/htmlparser/tests/html/strike002.html deleted file mode 100644 index 65d505d595a8..000000000000 --- a/htmlparser/tests/html/strike002.html +++ /dev/null @@ -1,17 +0,0 @@ - - -Nested STRIKE - - -nested STRIKE
    -1normal text in body line 1. - -A This text is in STRIKE - -
    B line 2 in STRIKE -
    -C line 3 in STRIKE -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/strike005.html b/htmlparser/tests/html/strike005.html deleted file mode 100644 index 4040627aca80..000000000000 --- a/htmlparser/tests/html/strike005.html +++ /dev/null @@ -1,22 +0,0 @@ - - -strikeover a whole TABLE - - -strikeover a whole TABLE, no visual effect, no strikeover on text in table
    -1normal text in body line 1. - -text with strikeover - - - -
    This is the first cell in the table - This is the second cell in the table -
    This is the third cell in the table - This is the fourth cell in the table -
    -text still inside strikeover. -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/strike006.html b/htmlparser/tests/html/strike006.html deleted file mode 100644 index 979198c12b3d..000000000000 --- a/htmlparser/tests/html/strike006.html +++ /dev/null @@ -1,18 +0,0 @@ - - -strikeover in a table cell TD - - -strikeover in a table cell TD
    -1normal text in body line 1. - - - -
    This is the first cell in the table - This is strikeover -
    This is the third cell in the table - This is the fourth cell in the table -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/strike007.html b/htmlparser/tests/html/strike007.html deleted file mode 100644 index eaa80db9961a..000000000000 --- a/htmlparser/tests/html/strike007.html +++ /dev/null @@ -1,20 +0,0 @@ - - -strikeover a whole table cell TD - - -strikeover a whole table cell TD -1normal text in body line 1. - - - - - -
    This is the first cell in the table - - This cell is in strikeover
    This is the third cell in the table - This is the fourth cell in the table -
    -2normal text in body line 2. - - diff --git a/htmlparser/tests/html/strike008.html b/htmlparser/tests/html/strike008.html deleted file mode 100644 index cc95b6ebf47e..000000000000 --- a/htmlparser/tests/html/strike008.html +++ /dev/null @@ -1,26 +0,0 @@ - - -strikeover cross scope with a table - - -

    strikeover cross scope with a table
    -It starts before table, but ends inside table. -
    It does not effect the table, but the strikeover is terminated after the table. -
    1normal text in body line 1. - -text in strikeover. - - - - - -
    This is the first cell in the table - This cell 2
    This is the third cell in the table - This is the fourth cell in the table -
    -

    -text not in strikeover - -
    2normal text in body line 2. - - diff --git a/htmlparser/tests/html/table05a.html b/htmlparser/tests/html/table05a.html deleted file mode 100644 index a6df8ca395eb..000000000000 --- a/htmlparser/tests/html/table05a.html +++ /dev/null @@ -1,14 +0,0 @@ -Caption before end of table -
    Nav 4.0: caption is accepted. (terminate tr) -
    Xena6.0: ignore misplaced caption tag. - - - - - -
    cell 1-1 - cell 1-2 -
    cell 2-1 - cell 2-2
    caption1
    - -last text diff --git a/htmlparser/tests/html/table05b.html b/htmlparser/tests/html/table05b.html deleted file mode 100644 index e05836f3ee2d..000000000000 --- a/htmlparser/tests/html/table05b.html +++ /dev/null @@ -1,14 +0,0 @@ -Caption after tr. -
    Nav 4.0: caption is accepted, terminate an empty tr, -
    Xena6.0: ignore misplaced caption tag. - - - taxeA - -
    caption1
    cell 1-1 - cell 1-2 -
    cell 2-1 - cell 2-2 -
    - -last text diff --git a/htmlparser/tests/html/table05c.html b/htmlparser/tests/html/table05c.html deleted file mode 100644 index 1b55afcc3a59..000000000000 --- a/htmlparser/tests/html/table05c.html +++ /dev/null @@ -1,14 +0,0 @@ -Caption between table cell -
    Nav 4.0: caption is accepted, terminate tr, -
    Xena6.0: ignore misplaced caption tag. - - - - textA - -
    cell 1-1
    caption1
    cell 1-2 -
    cell 2-1 - cell2-2 -
    - -last text diff --git a/htmlparser/tests/html/table05d.html b/htmlparser/tests/html/table05d.html deleted file mode 100644 index 3144e70f1468..000000000000 --- a/htmlparser/tests/html/table05d.html +++ /dev/null @@ -1,13 +0,0 @@ -Caption inside cell -
    Nav 4.0: caption is accepted, terminates both td and tr. -
    Xena6.0: ignore misplaced caption tag. - - - textA - -
    cell 1-1
    caption1
    cell 1-2 -
    cell 2-1 - cell 2-2 -
    - -last text diff --git a/htmlparser/tests/html/table05k.html b/htmlparser/tests/html/table05k.html deleted file mode 100644 index d05228d0811b..000000000000 --- a/htmlparser/tests/html/table05k.html +++ /dev/null @@ -1,19 +0,0 @@ -Table in caption works. - - - -
    caption1
    text1 text2 - - - -
    cell 1-1 - cell 1-2 -
    still in - caption -
    -
    cell 2-1-1 - cell 2-1-2 -
    cell 2-2-1 -
    - -last text diff --git a/htmlparser/tests/html/table05l.html b/htmlparser/tests/html/table05l.html deleted file mode 100644 index c7dbd30c4e98..000000000000 --- a/htmlparser/tests/html/table05l.html +++ /dev/null @@ -1,14 +0,0 @@ -Table in caption, and Nav4.0 show blank page if the window is wide. -Table in caption is part of the caption, leave the table body empty. - - - -
    caption1
    text1 text2 - - - -
    cell 1-1 - cell 1-2 -
    cell 2-1 - cell 2-2 -
    - -Nev. 4.0 ignore empty tables. \ No newline at end of file diff --git a/htmlparser/tests/html/table05m.html b/htmlparser/tests/html/table05m.html deleted file mode 100644 index 18c01e22dfa1..000000000000 --- a/htmlparser/tests/html/table05m.html +++ /dev/null @@ -1,5 +0,0 @@ -Start table tag alone is ignored. - - - -last text diff --git a/htmlparser/tests/html/table05n.html b/htmlparser/tests/html/table05n.html deleted file mode 100644 index 83aede590609..000000000000 --- a/htmlparser/tests/html/table05n.html +++ /dev/null @@ -1,11 +0,0 @@ -End table tag terminates caption and table. -
    -
    caption1
    text1 text2 -
    -
    cell1 -
    cell2 -
    - -last text diff --git a/htmlparser/tests/html/table05o.html b/htmlparser/tests/html/table05o.html deleted file mode 100644 index 34fa22e5fd15..000000000000 --- a/htmlparser/tests/html/table05o.html +++ /dev/null @@ -1,6 +0,0 @@ -caption is not shown for empty table. - - -
    caption1
    text1 text2
    - -last text diff --git a/htmlparser/tests/html/table07.html b/htmlparser/tests/html/table07.html deleted file mode 100644 index 64a0edfbe9cc..000000000000 --- a/htmlparser/tests/html/table07.html +++ /dev/null @@ -1,13 +0,0 @@ -text1 - text2 - - - -
    cell 1-1 - cell 1-2 -
    still in - caption -
    -text3 -
    -last text diff --git a/htmlparser/tests/html/table200.html b/htmlparser/tests/html/table200.html deleted file mode 100644 index e43c788bf7b6..000000000000 --- a/htmlparser/tests/html/table200.html +++ /dev/null @@ -1,21 +0,0 @@ -Table in table, but not in tr-td. -
    the outter table is ignored. - - -
    - - - -
    caption of table1.
    first cell in table1 - second cell in table1 -
    third cell in table1 - fourth cell in table1 -
    -

    This is the first cell in table2. -
    - -NOTE, even no line break here! -
    -Caption of outer table is ignored. -Data of outer table is treated as text. \ No newline at end of file diff --git a/htmlparser/tests/html/table201.html b/htmlparser/tests/html/table201.html deleted file mode 100644 index 62e9665d5e80..000000000000 --- a/htmlparser/tests/html/table201.html +++ /dev/null @@ -1,22 +0,0 @@ -Table in table, but not in tr-td. -
    the outter table is ignored. - - - -
    caption of table2.
    - - - -
    caption of table1.
    first cell in table1 - second cell in table1 -
    third cell in table1 - fourth cell in table1 -
    -
    This is the first cell in table2. -
    - -NOTE, even no line break here! -
    -Caption of outer table is ignored. -Data of outer table is treated as text. \ No newline at end of file diff --git a/htmlparser/tests/html/table202.html b/htmlparser/tests/html/table202.html deleted file mode 100644 index 5f5c81afdcf1..000000000000 --- a/htmlparser/tests/html/table202.html +++ /dev/null @@ -1,23 +0,0 @@ -Table in table, after tr, but not td. -
    the outter table and tr are ignored. - - - - -
    caption of table2.
    - - - -
    caption of table1.
    first cell in table1 - second cell in table1 -
    third cell in table1 - fourth cell in table1 -
    - -This is the first cell in table2. - - -NOTE, even no line break here! -
    -Caption of outer table is ignored. -Data of outer table is treated as text. \ No newline at end of file diff --git a/htmlparser/tests/html/table203.html b/htmlparser/tests/html/table203.html deleted file mode 100644 index 8d0075d53efc..000000000000 --- a/htmlparser/tests/html/table203.html +++ /dev/null @@ -1,29 +0,0 @@ -Table nested 3 levels, table3 be the out most. -
    Table 1 is not in table2's tr-td. That makes table2 invalid. -
    Table3 is still valid, and table2's tr and td are used for -table3. - - - - - -
    caption of table3.
    - - - Text after table2, not in tr-td, but in table3's tr-td. -
    caption of table2.
    - - - -
    caption of table1.
    first cell in table1 - second cell in table1 -
    third cell in the table1 - fourth cell in the table1 -
    -
    first cell in table2, but displayed in table 3. -
    - - second cell in table3. but table2's end tag closed table3. - So, this will be displayed out of table3. - - diff --git a/htmlparser/tests/html/table204.html b/htmlparser/tests/html/table204.html deleted file mode 100644 index 0842245cc02f..000000000000 --- a/htmlparser/tests/html/table204.html +++ /dev/null @@ -1,23 +0,0 @@ -nested table missing end td. -
    Nav. 4.0(bug): For inner table td does NOT terminate previous td. -
    Xena 6.0: end td is always optional. - - - - - -
    caption of table1.
    cell 1-1 - cell 1-2 -
    cell 1-3 - cell 1-4 - - - - -
    caption of table2.
    cell1 in table2 - cell2 in table2 -
    cell3 in table2 - cell4 in table2 -
    -
    -
    Last text. \ No newline at end of file diff --git a/htmlparser/tests/html/table205.html b/htmlparser/tests/html/table205.html deleted file mode 100644 index 905659196c33..000000000000 --- a/htmlparser/tests/html/table205.html +++ /dev/null @@ -1,28 +0,0 @@ -Table in table, but not in tr-td. -
    The only thing missing is the end tag of the -first table! -Nav. 4.0: the outter table is ignored. -Xena 6.0: second table start tag is ignored. Contents put in 1 table. - - - - - - - - -
    caption of table1.
    first cell in table1 - second cell in table1 -
    third cell in table1 - fourth cell in table1 -
    - - - -
    caption of table2.
    cell1 in table2 - cell2 in table2 -
    cell3 in table2 - cell4 in table2 -
    - -
    Last text. \ No newline at end of file diff --git a/htmlparser/tests/html/tableall.html b/htmlparser/tests/html/tableall.html deleted file mode 100644 index 0381ca289ca6..000000000000 --- a/htmlparser/tests/html/tableall.html +++ /dev/null @@ -1,81 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    table test cases
    Test case description -
    good syntax
    -
    table01.html <table> <tr> <td> -
    table02.html <table> <tr> <td> <table> nested -
    table03.html <table border align=left> -
    table04.html <table> mixed with list. - -
    table05 The correct position for Caption is after table start tag. - - -
    bad syntax
    -
    *table05a Caption before </table>. 4.0: Accepted, 6.0: ignored. -
    *table05b Caption after tr. 4.0: Accepted, 6.0: ignored. -
    *table05c Caption between table cell. 4.0: Accepted, 6.0: ignored. -
    *table05d Caption inside cell. 4.0: terminates both td and tr. 6.0: Ignored. -
    table05e Multiple caption outside cells. 4.0 takes first as Caption, others as text. -
    *table05f Captions inside cells, 4.0 takes last one, terminate td tr, 6.0 Ignore misplaced tag. -
    table05g Captions outside of table are treated as text. -
    table05h tr terminates caption -
    table05i tr terminates Caption, and close open(font) tags in caption. -
    table05j TD does NOT terminate Caption. TD's contents are used in caption. -
    table05k Table in caption works. -
    table05l Table in caption, table empty. 4.0(bug): shows blank page. -
    table05m <table> alone is ignored. -
    table05n </table> terminates caption and table. -
    table05o caption is not shown for empty table.
    table110.htm <tr> <td> missing <table> -
    table115.htm <table> text1 <tr> test2 <td> test3 -
    table120.htm <table> </table> table totally empty . -
    table125.htm <table> text </table>, no tr, td -
    table130.htm <table> <td> , missing tr -
    table135.htm <table> <tr> <tr> <td> , empty tr. -
    table140.htm <table> <tr> text <tr> <td> , empty tr. -
    table150.htm <table> <tr> <td> <td> , empty td. - -
    table201.html <table> <table> -
    table202.html <table> <tr> <table> -
    table203.html Table nested 3 levels, T1 not in T2's td -
    *table204.htm nested table missing end td. 4.0(bug) inner table need </td> -
    *table205.htm table in table. 4.0: discard outer, 6.0: put into 1 table. -
    - - diff --git a/htmlparser/tests/html/tag001.html b/htmlparser/tests/html/tag001.html deleted file mode 100644 index dd2b5698dca2..000000000000 --- a/htmlparser/tests/html/tag001.html +++ /dev/null @@ -1,3 +0,0 @@ -first line -second line -third line < \ No newline at end of file diff --git a/htmlparser/tests/html/tag002.html b/htmlparser/tests/html/tag002.html deleted file mode 100644 index 5083bee2948f..000000000000 --- a/htmlparser/tests/html/tag002.html +++ /dev/null @@ -1,23 +0,0 @@ -Broken tags are shown, only if no gt sign can be found. - - -aaaaaaaaaaaa - - -bbbbbbbb - -< - -open tag without name, stat with space, treated as text. -
    -KKKKKKKKKK - -< open tag has no tag name, stat with space, treated as text. -
    -ccccccc - -second line -

    Here is next paragraph. -
    -They ignored by 4,0. -Everything between the brakets is consummed. \ No newline at end of file diff --git a/htmlparser/tests/html/tag004.html b/htmlparser/tests/html/tag004.html deleted file mode 100644 index 00b6cc770739..000000000000 --- a/htmlparser/tests/html/tag004.html +++ /dev/null @@ -1,4 +0,0 @@ -first line -
    empty brakets are displayed as text -<> -more text. diff --git a/htmlparser/tests/html/tag005.html b/htmlparser/tests/html/tag005.html deleted file mode 100644 index cb46e4277961..000000000000 --- a/htmlparser/tests/html/tag005.html +++ /dev/null @@ -1,7 +0,0 @@ -first line -
    empty brakets are displayed as text -This -and this eat next br tag. -and another one. -Last text. diff --git a/htmlparser/tests/html/tag006.html b/htmlparser/tests/html/tag006.html deleted file mode 100644 index 36d3dcc0d022..000000000000 --- a/htmlparser/tests/html/tag006.html +++ /dev/null @@ -1,6 +0,0 @@ -first line -
    invalide tags are ignored by 4.0 - - - -Note: no line break here. diff --git a/htmlparser/tests/html/tag007.html b/htmlparser/tests/html/tag007.html deleted file mode 100644 index 596b5484dc5a..000000000000 --- a/htmlparser/tests/html/tag007.html +++ /dev/null @@ -1,5 +0,0 @@ -first line -
    Unmacthed opening tag in next line is ignored by 4.0 - -
    -Note: no line break inserted here. diff --git a/htmlparser/tests/html/tag008.html b/htmlparser/tests/html/tag008.html deleted file mode 100644 index 6b67931e7329..000000000000 --- a/htmlparser/tests/html/tag008.html +++ /dev/null @@ -1,4 +0,0 @@ -first line -
    Unmacthed end tag in next line is ignored by 4.0 -
    -Note: no line break inserted here. diff --git a/htmlparser/tests/html/target01.html b/htmlparser/tests/html/target01.html deleted file mode 100644 index 44c34fdbb299..000000000000 --- a/htmlparser/tests/html/target01.html +++ /dev/null @@ -1,7 +0,0 @@ - -Target outside of quoted HREF. -
    example: -
    -two days - - \ No newline at end of file diff --git a/htmlparser/tests/html/tbody001.html b/htmlparser/tests/html/tbody001.html deleted file mode 100644 index d639a01c6065..000000000000 --- a/htmlparser/tests/html/tbody001.html +++ /dev/null @@ -1,17 +0,0 @@ - - -Welcome to Netscape - - -
    - - - - - -
    - table element with no attributes. The required embedded - elements are used - TBODY-TR-TD. -
    - - diff --git a/htmlparser/tests/html/text001.html b/htmlparser/tests/html/text001.html deleted file mode 100644 index 55c4e5ee38ec..000000000000 --- a/htmlparser/tests/html/text001.html +++ /dev/null @@ -1,8 +0,0 @@ - - -Welcome to Netscape - - -Text-001 - - diff --git a/htmlparser/tests/html/text002.html b/htmlparser/tests/html/text002.html deleted file mode 100644 index 68b070ca308c..000000000000 --- a/htmlparser/tests/html/text002.html +++ /dev/null @@ -1,8 +0,0 @@ - - -Welcome to Netscape - - -Text-002 - - diff --git a/htmlparser/tests/html/text003.html b/htmlparser/tests/html/text003.html deleted file mode 100644 index aa1f7ae9ccc9..000000000000 --- a/htmlparser/tests/html/text003.html +++ /dev/null @@ -1,12 +0,0 @@ - - - -Welcome to Netscape - - -Text-001 - -
    -Text after closing font tag. - - diff --git a/htmlparser/tests/html/thead001.html b/htmlparser/tests/html/thead001.html deleted file mode 100644 index b517ea769be9..000000000000 --- a/htmlparser/tests/html/thead001.html +++ /dev/null @@ -1,25 +0,0 @@ -Normal case for thead, tfoot, tbody - - - - - - - - - - - - - - - - - -
    caption of table2.
    text in thead
    text in tfoot
    first cell in table1 - second cell in table1 -
    third cell in table1 - fourth cell in table1 -
    - -last text. \ No newline at end of file diff --git a/htmlparser/tests/html/tiny.html b/htmlparser/tests/html/tiny.html deleted file mode 100644 index e1013c531daf..000000000000 --- a/htmlparser/tests/html/tiny.html +++ /dev/null @@ -1,5 +0,0 @@ - - -

    My text - - \ No newline at end of file diff --git a/htmlparser/tests/html/title.html b/htmlparser/tests/html/title.html deleted file mode 100644 index 23a697e52aed..000000000000 --- a/htmlparser/tests/html/title.html +++ /dev/null @@ -1,8 +0,0 @@ - - - - </head> - <body> - This should be in the body - </body> -</html> \ No newline at end of file diff --git a/htmlparser/tests/html/title01.html b/htmlparser/tests/html/title01.html deleted file mode 100644 index c64040982448..000000000000 --- a/htmlparser/tests/html/title01.html +++ /dev/null @@ -1,6 +0,0 @@ -<html> -<title> -<!-- still in comment --> - -some text - diff --git a/htmlparser/tests/html/usascii.html b/htmlparser/tests/html/usascii.html deleted file mode 100644 index 834856339d07..000000000000 --- a/htmlparser/tests/html/usascii.html +++ /dev/null @@ -1,26 +0,0 @@ - - - - Juan Gotoh's Visual Workshop - - - - - - - - - - - - -<BODY> - -<P>If your brouser can not display frame, please click -<A HREF="home_E.html">here.</A> it has same contents as frame -version.</P> - -</BODY> - - diff --git a/htmlparser/tests/html/utf8001.html b/htmlparser/tests/html/utf8001.html deleted file mode 100644 index 017a3d7c652e..000000000000 --- a/htmlparser/tests/html/utf8001.html +++ /dev/null @@ -1,9 +0,0 @@ - - - -Welcome to Netscape - - -Text-001 - - diff --git a/htmlparser/tests/html/value001.html b/htmlparser/tests/html/value001.html deleted file mode 100644 index 55c926e7eb74..000000000000 --- a/htmlparser/tests/html/value001.html +++ /dev/null @@ -1,14 +0,0 @@ - - - - -Welcome to Netscape - - - - - - - - - diff --git a/htmlparser/tests/html/xmp005.html b/htmlparser/tests/html/xmp005.html deleted file mode 100644 index ac952f5b8497..000000000000 --- a/htmlparser/tests/html/xmp005.html +++ /dev/null @@ -1,48 +0,0 @@ - - -use a XMP tag for C or JAVA source code. -
    The lt and gt signes are problematic. - - -

    - cc = 123; - // test lt - if( cc < hhh ) { - if(cc<xxx || as > gh) { - //do womthing here; - } - } - if( cc <= iii ) { - if(cc<=yyy) { - //do womthing here; - } - } - // test gt - if( cc > rrr ) { - if(cc>eee) { - //do womthing here; - } - } - if( cc >= www ) { - if(cc>=qqq) { - //do womthing here; - } - } - // what if the variable name is a valid tag name. - if( cc < B && cc > kk ) { - if(cc<B && cc > gg) { - //do womthing here; - } - } - cc = aa<<I; - cc = B>>3; - gt = true; - for(a=0,gt=true; (a>0)&&gt; a++) { - } - a = 0x0004; - lt = 0x0002; - for(; a&lt; ) { - } - - - \ No newline at end of file diff --git a/htmlparser/tests/htmlgen/htmlgen.cpp b/htmlparser/tests/htmlgen/htmlgen.cpp deleted file mode 100644 index de426e7fd462..000000000000 --- a/htmlparser/tests/htmlgen/htmlgen.cpp +++ /dev/null @@ -1,517 +0,0 @@ -/*======================================================== - To Do: - 1. Tag sequences on the command line p..table - 2. Dumping documents into the right place - *========================================================*/ - - -/*======================================================== - Special keywords: - $0..$9: represent command line arguments - @file: the name of the file being writtent - @nextfile: the name of the next file to be written - @import: imports text from another file. - *========================================================*/ - - -#include -#include -#include -#include - -#include -#include -#include -#include - - -static char* tagTable[] = { - "A", "ABBR", "ACRONYM", "ADDRESS", "APPLET", "AREA", - "B", "BASE", "BASEFONT", "BDO", "BGSOUND", "BIG", "BLINK", "BLOCKQUOTE", "BODY", "BR", "BUTTON", - "CAPTION", "CENTER", "CITE", "CODE", "COL", "COLGROUP", - "DD", "DEL", "DFN", "DIR", "DIV", "DL", "DT", - "EM", "EMBED", - "FIELDSET", "FONT", "FORM", "FRAME", "FRAMESET", - "H1", "H2", "H3", "H4", "H5", "H6", "HEAD", "HR", "HTML", - "I", "IFRAME", "ILAYER", "IMG", "INPUT", "INS", "ISINDEX", - "KBD", "KEYGEN", - "LABEL", "LAYER", "LEGEND", "LI", "LINK", "LISTING", - "MAP", "MENU", "META", "MULTICOL", - "NOBR", "NOEMBED", "NOFRAMES", "NOLAYER", "NOSCRIPT", - "OBJECT", "OL", "OPTGROUP", "OPTION", - "P", "PARAM", "PLAINTEXT", "PRE", - "Q", - "S","SAMP","SCRIPT","SELECT","SERVER","SMALL","SOUND","SPACER","SPAN","STRIKE","STRONG","STYLE","SUB","SUP", - "TABLE", "TBODY", "TD", "TEXTAREA", "TFOOT", "TH", "THEAD", "TITLE", "TR", "TT", - "U", "UL", - "VAR", - "WBR", - "XMP", - 0 -}; -static char gCWD[1025]; -static char gPrevFile[128]; -static char gThisFile[128]; -static char gNextFile[128]; -static int gFileIndex=0; - - -int findTag(const char* aTagName) { - int low = 0; - int high = 107; - while (low <= high) { - int middle = (low + high) >> 1; - int result = stricmp(aTagName, tagTable[middle]); - if (result == 0) - return middle; - if (result < 0) - high = middle - 1; - else - low = middle + 1; - } - return -1; -} - - -/** - * Call this to find a tag that closely resembles the given tag. - * Note that we match based on the first char. - * @update gess12/23/98 - * @param - * @return - */ -int findNearestTag(char* aTag){ - int result=-1; - if(aTag){ - char theChar=toupper(aTag[0]); - int theIndex=-1; - while(tagTable[++theIndex]){ - if(toupper(tagTable[theIndex][0])==theChar) { - return theIndex; - } - } - } - if(toupper(aTag[0])<'A') - result=0; - else result=107; - return result; -} - -char* getNthTagAfter(int aRangeIndex,char* aStartTag){ - int theIndex=findTag(aStartTag); - if(-1==theIndex){ - theIndex=findNearestTag(aStartTag); - } - if(-10){ - if((strchr(aString,',')) || (strchr(aString,'-'))) { - mRanges[mCount]=new char[sLen+1]; - strcpy(mRanges[mCount++],aString); - } - else { - mKeys[mCount]=new char[sLen+1]; - strcpy(mKeys[mCount++],aString); - } - return true; - } - } - return false; - } - - int getCount() {return mCount;} - - char* getMacro(int anIndex) { - if(anIndex> theBuffer; - if(!stricmp(theBuffer,"-F")){ - //copy the filename... - aStream >> theBuffer; - strcpy(mFilename,theBuffer); - readDefs=false; - } -#if 0 - else if(!stricmp(theBuffer,"-D")){ - readDefs=true; - } -#endif - else if(!stricmp(theBuffer,"-O")){ - aStream >> theBuffer; - readDefs=false; - } - else { - if(theBuffer[0]){ - addMacro(theBuffer); - theBuffer[0]=0; - } - } - } - return true; - } - - void buildArgBuffer(char* aBuffer) { - aBuffer[0]=0; - if(mFilename[0]) { - sprintf(aBuffer,"-o %s -f %s ",gThisFile,mFilename); - } - for(int i=0;i>theWord; - char* thePos=strchr(theWord,'@'); - if(thePos){ - strncat(temp,theWord,thePos-theWord); - if(!strnicmp(thePos,"@file",5)){ - strcat(temp,gThisFile); - thePos+=5; - } - else if(!strnicmp(thePos,"@nextfile",9)){ - strcat(temp,gNextFile); - thePos+=9; - } - else if(!strnicmp(thePos,"@prevfile",9)){ - strcat(temp,gPrevFile); - thePos+=9; - } - strcat(temp,thePos); - } - else strcat(temp,theWord); - strcat(temp," "); - } - strcpy(aBuffer,temp); - } -} - -/** - * - * @update gess12/20/98 - * @param - * @return - */ -void expandMacros(char* aBuffer,CMacros& aMacroSet){ - char temp[1024]; - int rPos=-1; - int wPos=0; - - if(aBuffer){ - while(aBuffer[++rPos]){ - if('$'==aBuffer[rPos]){ - temp[wPos]=0; - ++rPos; //skip the $... - int theIndex=aBuffer[rPos]-'0'; - char* theMacro=aMacroSet.getMacro(theIndex); - if(theMacro){ - strcat(temp,theMacro); - wPos=strlen(temp); - } - } - else temp[wPos++]=aBuffer[rPos]; - } - temp[wPos]=0; - strcpy(aBuffer,temp); - } -} - -/** - * - * @update gess12/20/98 - * @param - * @return - */ -int processFile(char* aDir,CMacros& aMacroList,fstream& anOutputStream,fstream& anInputStream){ - int result=0; - - if(anInputStream.is_open()){ - bool done=false; - char theBuffer[1024]; - char* p=0; - - while((!done) && (0==result)){ - - anInputStream.getline(theBuffer,sizeof(theBuffer)-1); - if(anInputStream.gcount()){ - - //before doing anything else, expand the macros and keywords... - expandMacros(theBuffer,aMacroList); - expandKeywords(theBuffer,aMacroList); - - //Now process each line: - p=strstr(theBuffer,"@import"); - if(p) { - - //First, see if the line is an htmlgen statement; if so, recurse to read new file... - char theFilename[1024]; - - strcpy(theFilename,"htmlgen -F "); - p+=8; - strcat(theFilename,p); - - //If you're here, we found an htmlgen statement. - // To handle this, we have to: - // 1. strip off the @htmlgen - // 2. grab the filename and collect the args, - // 3. and recurse... - - fstream theInStream(p,ios::in); - result=processFile(aDir,aMacroList,anOutputStream,theInStream); - } - else anOutputStream << theBuffer << endl; - } - else done=true; - } - } - return result; -} - - -/** - * This is where the different combinations of arguments - * gets constructed and passed on for processing. - * Note that this is even called when the args have only 1 value. - * @return error code - */ -int iterate(istrstream& aInputArgs){ - int result=0; - - CMacros theArgs; - theArgs.consume(aInputArgs); - - char theFilename[1024]; - - bool done=!theArgs.first(); - while((!done) && (0==result)){ - CMacros theTempArgs(theArgs); - - //theTempArgs.buildArgBuffer(theBuffer); - // istrstream theArgStream(theBuffer); - - sprintf(theFilename,"%s\\%s",gCWD,gThisFile); - fstream theOutStream(theFilename,ios::trunc); - - char* theInFile=theArgs.getFilename(); - fstream theInStream(theInFile,ios::in); - - result=processFile(gCWD,theTempArgs,theOutStream,theInStream); - theArgs.dump(); - done=!theArgs.next(); - } - return result; -} - -/** - * - * @update gess12/20/98 - * @param - * @return - */ -int main(int argc,char* argv[]){ - int result=0; - - gPrevFile[0]=gThisFile[0]=gNextFile[0]; - if(argc>1){ - char theBuffer[1024]; - theBuffer[0]=0; - - for(int i=1;i - - - - - - htmlgen - - -  - - - - -
    Using HTMLGen -
    Version 1.0
    - -

    HTMLGen is a C++ text-processing -utility that lets users quickly generate HTML testcases. While HTMLGen -is not restricted to generating HTML files, the program does understand -the set of known HTML tags. HTMLGen takes user supplied file fragments -and command line arguments as input, and automatically generates a series -of derived HTML files. -

    Here's a quick example: Let's say -you want to create a series of tests to verify that Gecko correctly renders -every tag inside the <BODY> element. To accomplish this, begin with -a small HTML fragment file given below: -
      - - - - -
    <html> -
    <body> -
    <$0>text</$0> -
    </body> -
    </html>
    - -

    Notice that the file looks like a -typical HTML file, except for the tags that read <$0> and </$0>. -These are macros, which are content that will be replaced by arguments -provided on the HTMLGen command line. To finish our example, simply run -HTMLGen with the following command line arguments: -

    HTMLGen -f test1.html a-z
    -This instructs HTML to read the test1.html -file and use it as a template. Next, the tag-range argument "a-z" instructs -HTMLGen to autogenerate a file for each HTML tag between A and Z. HTMLGen -uses this argument as the 0th element of its macro substitution process. -The macros <$0> and </$0> will be replaced in each autogenerated -file with the current value of the $0 argument. So, the first file will -contain <A> where it sees <$0>, the second will use <B> and so -on. -

    The result of running HTMLGen against -the given template with these arguments is that you'll get a new file for -each tag we see in the range between A..Z.  Note that in this example -we're emitting valid HTML, but that is not a requirement. You can use HTMLGen -to generate lots of illegal content for testing as well. -
      -
      - - - - -
    HTMLGen Command Line Arguments
    - -

    The arguments you use for controlling -HTMLGen always have the same form, as shown: -

    HTMLGen -f test1.html [-o outfile name] taglist0 -[,taglist1, ..., taglist9] -

    The first argument is always the -name of the template file to read. -
    The (optional) second argument is -the name of the output file. -
    The 3rd argument on represent represent -tag sequences to be used in macro substitution. These can have one of three -forms: 1) a single tag; 2) a list of tags (a,b,c); 3) a range of tags (a-z). -
      -
      - - - - -
    HTMLGen Embedded Text Commands
    - -

    In addition to the command line arguments, -HTMLGen can also respond to special markup found inside your template files. -Commands you can use inside your templates include: -

    @file   -- this gets replaced -with the name of the current output file. -
    @next -- this gets replaced with -the name of the next output file (useful for creating links between -files). -
    @import filename -- this -causes the contents of filename to be inserted into the current -document. -
      -
      - - - - -
    Notes and Cautions
    - -

    -
  • -The first note of caution is that HTMLGen -can produce a large number of test files in very little time. You should -be careful when you specify arguments because the combinatorics add up -to a potentially big document set.
  • -
    - - - diff --git a/htmlparser/tests/logparse/.cvsignore b/htmlparser/tests/logparse/.cvsignore deleted file mode 100644 index 244fcb4ed394..000000000000 --- a/htmlparser/tests/logparse/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -logparse diff --git a/htmlparser/tests/logparse/logparse.cpp b/htmlparser/tests/logparse/logparse.cpp deleted file mode 100644 index b75e6c423120..000000000000 --- a/htmlparser/tests/logparse/logparse.cpp +++ /dev/null @@ -1,239 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "nsXPCOM.h" -#include "nsIComponentManager.h" -#include "nsParserCIID.h" -#include "nsIAtom.h" -#include "nsIParser.h" -#include "nsILoggingSink.h" -#include "nsIIOService.h" -#include "nsNetCID.h" -#include "nsIURI.h" -#include "CNavDTD.h" -#include - -// Class IID's -static NS_DEFINE_CID(kParserCID, NS_PARSER_CID); -static NS_DEFINE_IID(kLoggingSinkCID, NS_LOGGING_SINK_CID); -static NS_DEFINE_CID(kIOServiceCID, NS_IOSERVICE_CID); - -// Interface IID's - -//---------------------------------------------------------------------- - -static const char* kWorkingDir = "./"; - -nsresult GenerateBaselineFile(const char* aSourceFilename,const char* aBaselineFilename) -{ - if (!aSourceFilename || !aBaselineFilename) - return NS_ERROR_INVALID_ARG; - - nsresult rv; - - // Create a parser - nsCOMPtr parser(do_CreateInstance(kParserCID, &rv)); - if (NS_FAILED(rv)) { - cout << "Unable to create a parser (" << rv << ")" < sink(do_CreateInstance(kLoggingSinkCID, &rv)); - if (NS_FAILED(rv)) { - cout << "Unable to create a sink (" << rv << ")" < localfile (do_CreateInstance(NS_LOCAL_FILE_CONTRACTID, &rv)); - if (NS_FAILED(rv)) - return rv; - - localfile->InitWithNativePath(nsDependentCString(aSourceFilename)); - nsCOMPtr inputURI; - { - nsCOMPtr ioService(do_GetService(kIOServiceCID, &rv)); - if (NS_FAILED(rv)) - return rv; - rv = ioService->NewFileURI(localfile, getter_AddRefs(inputURI)); - if (NS_FAILED(rv)) - return rv; - } - localfile->InitWithNativePath(nsDependentCString(aBaselineFilename)); - PRFileDesc *outputfile; - localfile->OpenNSPRFileDesc(0660, PR_WRONLY | PR_CREATE_FILE, &outputfile); - sink->SetOutputStream(outputfile); - - // Parse the document, having the sink write the data to fp - nsIDTD* dtd = nsnull; - NS_NewNavHTMLDTD(&dtd); - parser->RegisterDTD(dtd); - parser->SetContentSink(sink); - - rv = parser->Parse(inputURI, 0, PR_FALSE, 0, eDTDMode_unknown); - - return rv; -} - -//---------------------------------------------------------------------- - -PRBool CompareFiles(const char* aFilename1, const char* aFilename2) { - PRBool result=PR_TRUE; - - fstream theFirstStream(aFilename1,ios::in | ios::nocreate); - fstream theSecondStream(aFilename2,ios::in | ios::nocreate); - - PRBool done=PR_FALSE; - char ch1,ch2; - - while(!done) { - theFirstStream >> ch1; - theSecondStream >> ch2; - if(ch1!=ch2) { - result=PR_FALSE; - break; - } - done=PRBool((theFirstStream.ipfx(1)==0) || (theSecondStream.ipfx(1)==0)); - } - return result; -} - -//---------------------------------------------------------------------- - -void ComputeTempFilename(const char* anIndexFilename, char* aTempFilename) { - if(anIndexFilename) { - strcpy(aTempFilename,anIndexFilename); - char* pos=strrchr(aTempFilename,'\\'); - if(!pos) - pos=strrchr(aTempFilename,'/'); - if(pos) { - (*pos)=0; - strcat(aTempFilename,"/temp.blx"); - return; - } - } - //fall back to our last resort... - strcpy(aTempFilename,"c:/windows/temp/temp.blx"); -} - -//---------------------------------------------------------------------- - -static const char* kAppName = "logparse "; -static const char* kOption1 = "Compare baseline file-set"; -static const char* kOption2 = "Generate baseline "; -static const char* kResultMsg[2] = {" failed!"," ok."}; - -void ValidateBaselineFiles(const char* anIndexFilename) { - - fstream theIndexFile(anIndexFilename,ios::in | ios::nocreate); - char theFilename[500]; - char theBaselineFilename[500]; - char theTempFilename[500]; - PRBool done=PR_FALSE; - - ComputeTempFilename(anIndexFilename,theTempFilename); - - while(!done) { - theIndexFile >> theFilename; - theIndexFile >> theBaselineFilename; - if(theFilename[0] && theBaselineFilename[0]) { - if(NS_SUCCEEDED(GenerateBaselineFile(theFilename,theTempFilename))) { - PRBool matches=CompareFiles(theTempFilename,theBaselineFilename); - cout << theFilename << kResultMsg[matches] << endl; - } - } - theFilename[0]=0; - theBaselineFilename[0]=0; - done=PRBool(theIndexFile.ipfx(1)==0); - } - - - // Now it's time to compare our output to the baseline... -// if(!CompareFiles(aBaselineFilename,aBaselineFilename)){ -// cout << "File: \"" << aSourceFilename << "\" does not match baseline." << endl; -// } - -} - - -//---------------------------------------------------------------------- - -int main(int argc, char** argv) -{ - if (argc < 2) { - cout << "Usage: " << kAppName << " [options] [filename]" << endl; - cout << " -c [filelist] " << kOption1 << endl; - cout << " -g [in] [out] " << kOption2 << endl; - return -1; - } - - int result=0; - - nsresult rv = NS_InitXPCOM2(nsnull, nsnull, nsnull); - if (NS_FAILED(rv)) { - printf("NS_InitXPCOM2 failed\n"); - return 1; - } - - if(0==strcmp("-c",argv[1])) { - - if(argc>2) { - cout << kOption1 << "..." << endl; - - //Open the master filelist, and read the filenames. - //Each line contains a source filename and a baseline filename, separated by a space. - ValidateBaselineFiles(argv[2]); - } - else { - cout << kAppName << ": Filelist missing for -c option -- nothing to do." << endl; - } - - } - else if(0==strcmp("-g",argv[1])) { - if(argc>3) { - cout << kOption2 << argv[3] << " from " << argv[2] << "..." << endl; - GenerateBaselineFile(argv[2],argv[3]); - } - else { - cout << kAppName << ": Filename(s) missing for -g option -- nothing to do." << endl; - } - } - else { - cout << kAppName << ": Unknown options -- nothing to do." << endl; - } - return result; -} diff --git a/htmlparser/tests/outsinks/.cvsignore b/htmlparser/tests/outsinks/.cvsignore deleted file mode 100644 index 0c5114ed6832..000000000000 --- a/htmlparser/tests/outsinks/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -Makefile -TestOutput -component.reg diff --git a/htmlparser/tests/outsinks/Convert.cpp b/htmlparser/tests/outsinks/Convert.cpp deleted file mode 100644 index 394e716c1825..000000000000 --- a/htmlparser/tests/outsinks/Convert.cpp +++ /dev/null @@ -1,333 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- - * ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code, released - * March 31, 1998. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998-1999 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Akkana Peck. - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include // for isdigit() - -#include "nsXPCOM.h" -#include "nsParserCIID.h" -#include "nsIParser.h" -#include "nsIHTMLContentSink.h" -#include "nsIContentSerializer.h" -#include "nsLayoutCID.h" -#include "nsIHTMLToTextSink.h" -#include "nsIComponentManager.h" -#include "nsIServiceManager.h" -#include "nsIComponentRegistrar.h" -#include "nsReadableUtils.h" -#include "nsCRT.h" - -static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID); -static NS_DEFINE_CID(kParserCID, NS_PARSER_CID); - -int -Compare(nsString& str, nsString& aFileName) -{ - // Open the file in a Unix-centric way, - // until I find out how to use nsFileSpec: - char* filename = ToNewCString(aFileName); - FILE* file = fopen(filename, "r"); - if (!file) - { - fprintf(stderr, "Can't open file %s", filename); - perror(" "); - delete[] filename; - return 2; - } - delete[] filename; - - // Inefficiently read from the file: - nsString inString; - int c; - int index = 0; - int different = 0; - while ((c = getc(file)) != EOF) - { - inString.Append(PRUnichar(c)); - // CVS isn't doing newline comparisons on these files for some reason. - // So compensate for possible newline problems in the CVS file: - if (c == '\n' && str[index] == '\r') - ++index; - if (c != str[index++]) - { - //printf("Comparison failed at char %d: generated was %d, file had %d\n", - // index, (int)str[index-1], (int)c); - different = index; - break; - } - } - if (file != stdin) - fclose(file); - - if (!different) - return 0; - else - { - nsAutoString left; - str.Left(left, different); - char* cstr = ToNewUTF8String(left); - printf("Comparison failed at char %d:\n-----\n%s\n-----\n", - different, cstr); - Recycle(cstr); - return 1; - } -} - -//---------------------------------------------------------------------- -// Convert html on stdin to either plaintext or (if toHTML) html -//---------------------------------------------------------------------- -nsresult -HTML2text(nsString& inString, nsString& inType, nsString& outType, - int flags, int wrapCol, nsString& compareAgainst) -{ - nsresult rv = NS_OK; - - nsString outString; - - // Create a parser - nsIParser* parser; - rv = nsComponentManager::CreateInstance(kParserCID, nsnull, - kIParserIID,(void**)&parser); - if (NS_FAILED(rv)) - { - printf("Unable to create a parser : 0x%x\n", rv); - return NS_ERROR_FAILURE; - } - - // Create the appropriate output sink -#ifdef USE_SERIALIZER - nsCAutoString progId(NS_CONTENTSERIALIZER_CONTRACTID_PREFIX); - progId.AppendWithConversion(outType); - - // The syntax used here doesn't work - nsCOMPtr mSerializer; - mSerializer = do_CreateInstance(NS_STATIC_CAST(const char *, progId)); - NS_ENSURE_TRUE(mSerializer, NS_ERROR_NOT_IMPLEMENTED); - - mSerializer->Init(flags, wrapCol); - - nsCOMPtr sink (do_QueryInterface(mSerializer)); - if (!sink) - { - printf("Couldn't get content sink!\n"); - return NS_ERROR_UNEXPECTED; - } -#else /* USE_SERIALIZER */ - nsCOMPtr sink; - if (inType != NS_LITERAL_STRING("text/html") - || outType != NS_LITERAL_STRING("text/plain")) - { - char* in = ToNewCString(inType); - char* out = ToNewCString(outType); - printf("Don't know how to convert from %s to %s\n", in, out); - Recycle(in); - Recycle(out); - return NS_ERROR_FAILURE; - } - - sink = do_CreateInstance(NS_PLAINTEXTSINK_CONTRACTID); - NS_ENSURE_TRUE(sink, NS_ERROR_FAILURE); - - nsCOMPtr textSink(do_QueryInterface(sink)); - NS_ENSURE_TRUE(textSink, NS_ERROR_FAILURE); - - textSink->Initialize(&outString, flags, wrapCol); -#endif /* USE_SERIALIZER */ - - parser->SetContentSink(sink); - nsCOMPtr dtd; - if (inType.Equals(NS_LITERAL_STRING("text/html"))) { - static NS_DEFINE_CID(kNavDTDCID, NS_CNAVDTD_CID); - rv=nsComponentManager::CreateInstance(kNavDTDCID,nsnull,NS_GET_IID(nsIDTD),getter_AddRefs(dtd)); - } - else - { - printf("Don't know how to deal with non-html input!\n"); - return NS_ERROR_NOT_IMPLEMENTED; - } - if (NS_FAILED(rv)) - { - printf("Couldn't create new HTML DTD: 0x%x\n", rv); - return rv; - } - - parser->RegisterDTD(dtd); - - rv = parser->Parse(inString, 0, NS_LossyConvertUCS2toASCII(inType), PR_FALSE, PR_TRUE); - if (NS_FAILED(rv)) - { - printf("Parse() failed! 0x%x\n", rv); - return rv; - } - NS_RELEASE(parser); - - if (compareAgainst.Length() > 0) - return Compare(outString, compareAgainst); - - char* charstar = ToNewUTF8String(outString); - printf("Output string is:\n--------------------\n%s--------------------\n", - charstar); - delete[] charstar; - - return NS_OK; -} - -//---------------------------------------------------------------------- - -int main(int argc, char** argv) -{ - nsString inType(NS_LITERAL_STRING("text/html")); - nsString outType(NS_LITERAL_STRING("text/plain")); - int wrapCol = 72; - int flags = 0; - nsString compareAgainst; - - - // Skip over progname arg: - const char* progname = argv[0]; - --argc; ++argv; - - // Process flags - while (argc > 0 && argv[0][0] == '-') - { - switch (argv[0][1]) - { - case 'h': - printf("\ -Usage: %s [-i intype] [-o outtype] [-f flags] [-w wrapcol] [-c comparison_file] infile\n\ -\tIn/out types are mime types (e.g. text/html)\n\ -\tcomparison_file is a file against which to compare the output\n\ -\n\ -\tDefaults are -i text/html -o text/plain -f 0 -w 72 [stdin]\n", - progname); - exit(0); - - case 'i': - if (argv[0][2] != '\0') - inType.AssignWithConversion(argv[0]+2); - else { - inType.AssignWithConversion(argv[1]); - --argc; - ++argv; - } - break; - - case 'o': - if (argv[0][2] != '\0') - outType.AssignWithConversion(argv[0]+2); - else { - outType.AssignWithConversion(argv[1]); - --argc; - ++argv; - } - break; - - case 'w': - if (isdigit(argv[0][2])) - wrapCol = atoi(argv[0]+2); - else { - wrapCol = atoi(argv[1]); - --argc; - ++argv; - } - break; - - case 'f': - if (isdigit(argv[0][2])) - flags = atoi(argv[0]+2); - else { - flags = atoi(argv[1]); - --argc; - ++argv; - } - break; - - case 'c': - if (argv[0][2] != '\0') - compareAgainst.AssignWithConversion(argv[0]+2); - else { - compareAgainst.AssignWithConversion(argv[1]); - --argc; - ++argv; - } - break; - } - ++argv; - --argc; - } - - FILE* file = 0; - if (argc > 0) // read from a file - { - // Open the file in a Unix-centric way, - // until I find out how to use nsFileSpec: - file = fopen(argv[0], "r"); - if (!file) - { - fprintf(stderr, "Can't open file %s", argv[0]); - perror(" "); - exit(1); - } - } - else - file = stdin; - - nsresult ret; - { - nsCOMPtr servMan; - NS_InitXPCOM2(getter_AddRefs(servMan), nsnull, nsnull); - nsCOMPtr registrar = do_QueryInterface(servMan); - NS_ASSERTION(registrar, "Null nsIComponentRegistrar"); - registrar->AutoRegister(nsnull); - - // Read in the string: very inefficient, but who cares? - nsString inString; - int c; - while ((c = getc(file)) != EOF) - inString.Append(PRUnichar(c)); - - if (file != stdin) - fclose(file); - - ret = HTML2text(inString, inType, outType, flags, wrapCol, compareAgainst); - } // this scopes the nsCOMPtrs - // no nsCOMPtrs are allowed to be alive when you call NS_ShutdownXPCOM - nsresult rv = NS_ShutdownXPCOM( NULL ); - NS_ASSERTION(NS_SUCCEEDED(rv), "NS_ShutdownXPCOM failed"); - return ret; -} diff --git a/htmlparser/tests/outsinks/TestOutSinks.pl b/htmlparser/tests/outsinks/TestOutSinks.pl deleted file mode 100755 index 974fc72b3330..000000000000 --- a/htmlparser/tests/outsinks/TestOutSinks.pl +++ /dev/null @@ -1,115 +0,0 @@ -#! /usr/bin/perl - -# ***** BEGIN LICENSE BLOCK ***** -# Version: MPL 1.1/GPL 2.0/LGPL 2.1 -# -# The contents of this file are subject to the Mozilla Public License Version -# 1.1 (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# http://www.mozilla.org/MPL/ -# -# Software distributed under the License is distributed on an "AS IS" basis, -# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License -# for the specific language governing rights and limitations under the -# License. -# -# The Original Code is Mozilla Communicator client code, released -# March 31, 1998. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998-1999 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Akkana Peck. -# -# Alternatively, the contents of this file may be used under the terms of -# either of the GNU General Public License Version 2 or later (the "GPL"), -# or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), -# in which case the provisions of the GPL or the LGPL are applicable instead -# of those above. If you wish to allow use of your version of this file only -# under the terms of either the GPL or the LGPL, and not to allow others to -# use your version of this file under the terms of the MPL, indicate your -# decision by deleting the provisions above and replace them with the notice -# and other provisions required by the GPL or the LGPL. If you do not delete -# the provisions above, a recipient may use your version of this file under -# the terms of any one of the MPL, the GPL or the LGPL. -# -# ***** END LICENSE BLOCK ***** - -# -# This is a collection of test files to guard against regressions -# in the Mozilla output system. -# Documentation on the tests is available at: -# http://www.mozilla.org/editor/serializer-tests.html -# - -# Make sure . is in the path, so we can load the other shared libraries -$ENV{LD_LIBRARY_PATH} .= ":."; - -$errmsg = ""; - -# print "Testing simple html to html ...\n"; -# $status = system("./TestOutput -i text/html -o text/html -f 0 -c OutTestData/simple.html OutTestData/simple.html"); -# if ($status != 0) { -# print "Simple html to html failed.\n"; -# $errmsg = "$errmsg simple.html"; -# } - -print "Testing simple copy case ...\n"; -$status = system("./TestOutput -i text/html -o text/plain -f 0 -w 0 -c OutTestData/simplecopy.out OutTestData/simple.html"); -if ($status != 0) { - print "Simple copy test failed.\n"; - $errmsg = "$errmsg simplecopy.out"; -} - -print "Testing simple html to plaintext formatting ...\n"; -$status = system("./TestOutput -i text/html -o text/plain -f 34 -w 70 -c OutTestData/simplefmt.out OutTestData/simple.html"); -if ($status != 0) { - print("Simple formatting test failed.\n"); - $errmsg = "$errmsg simplefmt.out "; -} - -print "Testing non-wrapped plaintext in preformatted mode ...\n"; -$status = system("./TestOutput -i text/html -o text/plain -f 16 -c OutTestData/plainnowrap.out OutTestData/plain.html"); -if ($status != 0) { - print "Non-wrapped plaintext test failed.\n"; - $errmsg = "$errmsg plainnowrap.out"; -} - -# print "Testing wrapped and formatted plaintext ...\n"; -# $status = system("TestOutput -i text/html -o text/plain -f 32 -c OutTestData/plainwrap.out OutTestData/plain.html"); -# if ($status != 0) { -# print "Wrapped plaintext test failed.\n"; -# $errmsg = "$errmsg plainwrap.out"; -# } - -print "Testing mail quoting ...\n"; -$status = system("./TestOutput -i text/html -o text/plain -f 2 -w 50 -c OutTestData/mailquote.out OutTestData/mailquote.html"); -if ($status != 0) { - print "Mail quoting test failed.\n"; - $errmsg = "$errmsg mailquote.out"; -} - -print "Testing format=flowed output ...\n"; -$status = system("./TestOutput -i text/html -o text/plain -f 66 -w 50 -c OutTestData/simplemail.out OutTestData/simplemail.html"); -if ($status != 0) { - print "Format=flowed test failed.\n"; - $errmsg = "$errmsg simplemail.out"; -} - -print "Testing HTML Table to Text ...\n"; -$status = system("./TestOutput -i text/html -o text/plain -f 2 -c OutTestData/htmltable.out OutTestData/htmltable.html"); -if ($status != 0) { - print "HTML Table to Plain text failed.\n"; - $errmsg = "$errmsg htmltable.out"; -} - -if ($errmsg ne "") { - print "\nERROR: DOM SERIALIZER TEST FAILED: $errmsg\n"; - print "See http://www.mozilla.org/editor/serializer-tests.html for help.\n"; - exit 1 -} else { - print "DOM SERIALIZER TESTS SUCCEEDED\n"; -} diff --git a/htmlparser/tests/outsinks/doctype.xif b/htmlparser/tests/outsinks/doctype.xif deleted file mode 100644 index 0a691a75e034..000000000000 --- a/htmlparser/tests/outsinks/doctype.xif +++ /dev/null @@ -1,17 +0,0 @@ - - - -
    - - - - - -DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN" - - -Here is some content inside a doctype - - - -
    diff --git a/htmlparser/tests/outsinks/entityxif.out b/htmlparser/tests/outsinks/entityxif.out deleted file mode 100644 index b46ce23cffa8..000000000000 --- a/htmlparser/tests/outsinks/entityxif.out +++ /dev/null @@ -1,2 +0,0 @@ - -http://www.hotbot.com/?MT=Search+Engines&SM=MC&DV=0&LG=any&DC=10&DE=2&BT=H&Search.x=31&Search.y=7r diff --git a/htmlparser/tests/outsinks/entityxif.xif b/htmlparser/tests/outsinks/entityxif.xif deleted file mode 100644 index ee8843e01a7c..000000000000 --- a/htmlparser/tests/outsinks/entityxif.xif +++ /dev/null @@ -1,52 +0,0 @@ - - - -
    - - - - - - - - -- The contents of this file are subject to the Netscape Public - -- License Version 1.1 (the "License"); you may not use this file - -- except in compliance with the License. You may obtain a copy of - -- the License at http://www.mozilla.org/NPL/ - -- - -- Software distributed under the License is distributed on an "AS - -- IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or - -- implied. See the License for the specific language governing - -- rights and limitations under the License. - -- - -- The Original Code is Mozilla Communicator client code, released - -- March 31, 1998. - -- - -- The Initial Developer of the Original Code is Netscape - -- Communications Corporation. Portions created by Netscape are - -- Copyright (C) 1998-1999 Netscape Communications Corporation. All - -- Rights Reserved. - -- - -- Contributor(s): - - - - - - - -http://www.hotbot.com/?MT=Search+Engines -SM=MC -DV=0 -LG=any -DC=10 -DE=2 -BT=H -Search.x=31 -Search.y=7r - - - - - -
    diff --git a/htmlparser/tests/outsinks/htmltable.html b/htmlparser/tests/outsinks/htmltable.html deleted file mode 100644 index ad2586bb0038..000000000000 --- a/htmlparser/tests/outsinks/htmltable.html +++ /dev/null @@ -1,16 +0,0 @@ - - -HTML To Text Test Page - - - -Below is a table.
    - - - - - -
    Row 1 Col 1Row 1 Col 2Row 1 Col 3
    Row 2 Col 1 Row 2 Col 2 Row 2 Col 3
    Row 3 Col 1Row 3 Col 2Row 3 Col 3
    -Here is after table. - - diff --git a/htmlparser/tests/outsinks/htmltable.out b/htmlparser/tests/outsinks/htmltable.out deleted file mode 100644 index 34b5f776a15a..000000000000 --- a/htmlparser/tests/outsinks/htmltable.out +++ /dev/null @@ -1,6 +0,0 @@ -Below is a table. -Row 1 Col 1 Row 1 Col 2 Row 1 Col 3 -Row 2 Col 1 Row 2 Col 2 Row 2 Col 3 -Row 3 Col 1 Row 3 Col 2 Row 3 Col 3 - -Here is after table. diff --git a/htmlparser/tests/outsinks/mailquote.html b/htmlparser/tests/outsinks/mailquote.html deleted file mode 100644 index 8ffa5bfe3a0b..000000000000 --- a/htmlparser/tests/outsinks/mailquote.html +++ /dev/null @@ -1,60 +0,0 @@ - - -Mail Quoting Test - - - - -This page is a test of mail quoting. -

    - -I hope you will enjoy these quotes from Hamlet, introduced by a fairly long line to see how quotations get wrapped: -

    - -

    -
    -(These have br tags after them.
    -To be, or not to be, that is the question
    -Whether 'tis nobler in the mind to suffer
    -The slings and fortunes of outrageous fortune
    -Or to take arms against a sea of troubles, -And by opposing end them.
    -
    - -

    -Oh, what a mind is here o'erthrown.
    - -

    -(The next line does not end with a br tag.).
    -Oh, what a rogue and peasant slave am I. -
    - -(Neither does the next line:)
    -The observed of all observers, quite, quite down! - -
    - -

    -Now we're outside all blockquotes. - - diff --git a/htmlparser/tests/outsinks/mailquote.out b/htmlparser/tests/outsinks/mailquote.out deleted file mode 100644 index 7336ee8e0832..000000000000 --- a/htmlparser/tests/outsinks/mailquote.out +++ /dev/null @@ -1,23 +0,0 @@ -This page is a test of mail quoting. - -I hope you will enjoy these quotes from /Hamlet/, -introduced by a fairly long line to see how -quotations get wrapped: - ->> /(These have *br* tags after them./ ->> To be, or not to be, that is the question ->> Whether 'tis nobler in the mind to suffer ->> The slings and fortunes of outrageous fortune ->> Or to take arms against a sea of troubles, And ->> by opposing end them. -> -> Oh, what a mind is here o'erthrown. -> ->> /(The next line does not end with a *br* tag.)./ ->> Oh, what a rogue and peasant slave am I. -> -> /(Neither does the next line:)/ -> The observed of all observers, quite, quite down! - -Now we're outside all blockquotes. - diff --git a/htmlparser/tests/outsinks/plain.html b/htmlparser/tests/outsinks/plain.html deleted file mode 100644 index cd562b10c833..000000000000 --- a/htmlparser/tests/outsinks/plain.html +++ /dev/null @@ -1,37 +0,0 @@ - -Ender Plain Text Test Page - - - -80 char width (for reference only): ----------|---------|---------|---------|---------|---------|---------|---------| -Here is a link to mozilla.org. -Here is some underlined and boldenedified text. -This is a test to make sure the output converters pick up the moz-pre-wrap style. They don't necessarily have to pick up the exact wrap setting. - -- This should be tested with wrapping on. -- This should be tested with wrapping off. - -This is the end. - - diff --git a/htmlparser/tests/outsinks/plainnowrap.out b/htmlparser/tests/outsinks/plainnowrap.out deleted file mode 100644 index fe303d9de06d..000000000000 --- a/htmlparser/tests/outsinks/plainnowrap.out +++ /dev/null @@ -1,12 +0,0 @@ - - -80 char width (for reference only): ----------|---------|---------|---------|---------|---------|---------|---------| -Here is a link to mozilla.org. -Here is some underlined and boldenedified text. -This is a test to make sure the output converters pick up the moz-pre-wrap style. They don't necessarily have to pick up the exact wrap setting. - -- This should be tested with wrapping on. -- This should be tested with wrapping off. - -This is the end. diff --git a/htmlparser/tests/outsinks/plainwrap.html b/htmlparser/tests/outsinks/plainwrap.html deleted file mode 100644 index 2bd6b19920a3..000000000000 --- a/htmlparser/tests/outsinks/plainwrap.html +++ /dev/null @@ -1,39 +0,0 @@ - - -Ender Plain Text Test Page - - - - -80 char width (for reference only) ----------|---------|---------|---------|---------|---------|---------|---------| -Welcome to the Gecko Plaintext Editor. -This message has the wrapping set to 72 columns using a style sheet. -Typed text will wrap to the current wrap setting. You can view or set the wrap settings by typing various characters, as such: -- alt-C: print the current wrap column setting. -- alt-]: increase the wrap setting by 5 -- alt-[: decrease the wrap setting by 5 -- ctrl-\: wrap to window width (wrapcolumn = -1) -- alt-\: turn off wrapping (wrapcolumn = 0) - - diff --git a/htmlparser/tests/outsinks/plainwrap.out b/htmlparser/tests/outsinks/plainwrap.out deleted file mode 100644 index a42b5e7fe2d8..000000000000 --- a/htmlparser/tests/outsinks/plainwrap.out +++ /dev/null @@ -1,9 +0,0 @@ -80 char width (for reference only): ----------|---------|---------|---------|---------|---------|---------|---------| -Here is a link to mozilla.org. Here is some -underlined and boldenedified text. This is a test -to make sure the output converters pick up the -moz-pre-wrap style. They don't necessarily have to -pick up the exact wrap setting. - This should be -tested with wrapping on. - This should be tested -with wrapping off. This is the end. diff --git a/htmlparser/tests/outsinks/quotes.html b/htmlparser/tests/outsinks/quotes.html deleted file mode 100644 index 8ecf59750998..000000000000 --- a/htmlparser/tests/outsinks/quotes.html +++ /dev/null @@ -1,5 +0,0 @@ - - -This is a page with "double quotes" and <angle brackets>. - - diff --git a/htmlparser/tests/outsinks/simple.html b/htmlparser/tests/outsinks/simple.html deleted file mode 100644 index 4576bfeca7bf..000000000000 --- a/htmlparser/tests/outsinks/simple.html +++ /dev/null @@ -1,63 +0,0 @@ - - - -Simple html page - -

    Simple html page

    - -Here is a link to the mozilla.org page. -Here is some underlined and boldenedified text -plus some <angle bracket entities>. - -

    -Here is a line ending with a space -followed by a line break. -Plaintext output should contain only one space (and no line breaks) between "space" and "followed". -

    - -

    -Here is a list: -

    - -
      -
    • An item
    • -
    • A nested ordered list:
    • -
      1. -
      2. item one
      3. -
      4. item two
      5. -
      -
    • -
    • last item
    • -
    - -

    Here is a paragraph after the list.

    - -Here is a blockquote: -
    -The quick brown fox -jumped over -the lazy dog -
    - - - diff --git a/htmlparser/tests/outsinks/simplecopy.out b/htmlparser/tests/outsinks/simplecopy.out deleted file mode 100644 index 0866b975c729..000000000000 --- a/htmlparser/tests/outsinks/simplecopy.out +++ /dev/null @@ -1,15 +0,0 @@ -Simple html page -Here is a link to the mozilla.org page. Here is some underlined and boldenedified text plus some . - -Here is a line ending with a space followed by a line break. Plaintext output should contain only one space (and no line breaks) between "space" and "followed". - -Here is a list: - - * An item - * A nested ordered list: - * - 1. item one - 2. item two - * last item - -Here is a paragraph after the list. diff --git a/htmlparser/tests/outsinks/simplefmt.out b/htmlparser/tests/outsinks/simplefmt.out deleted file mode 100644 index 1683b0117597..000000000000 --- a/htmlparser/tests/outsinks/simplefmt.out +++ /dev/null @@ -1,26 +0,0 @@ - - Simple html page - -Here is a link to the mozilla.org page. Here -is some _underlined and *bold*ened_ified text plus some . - -Here is a line ending with a space followed by a line break. Plaintext -output should contain only one space (and no line breaks) between -"space" and "followed". - -Here is a /list/: - - * An item - * A nested ordered list: - * - 1. item one - 2. item two - * last item - -Here is a paragraph after the list. - -Here is a blockquote: - - The quick brown fox jumped over the lazy dog - diff --git a/htmlparser/tests/outsinks/simplemail.html b/htmlparser/tests/outsinks/simplemail.html deleted file mode 100644 index 99e57080aee6..000000000000 --- a/htmlparser/tests/outsinks/simplemail.html +++ /dev/null @@ -1,18 +0,0 @@ - -This is a mail - with a couple of long lines and then a sig. This is used as test of the - format=flowed output in the nsHTMLToTXTSinkstream. If this test fails and - none else, it's likely the spaces at the ends of the lines that are missing. - They aren't easily seen without looking at the data in an editor and checking - where the end of line is.
    -
    -Testing Space stuffing
    -From shouldn't start a line.
    -  Nor a space.
    -> Neither should a greater-than symbol.
    ---
    -Above is NOT a sig delimiter since it's missing the trailing SPACE:
    -
    ---
    -Above is a sig delimiter. I hope.
    -A sig (wow!).  diff --git a/htmlparser/tests/outsinks/simplemail.out b/htmlparser/tests/outsinks/simplemail.out deleted file mode 100644 index 0de2867e6e27..000000000000 --- a/htmlparser/tests/outsinks/simplemail.out +++ /dev/null @@ -1,20 +0,0 @@ -This is a mail with a couple of long lines and -then a sig. This is used as test of the -format=flowed output in the nsHTMLToTXTSinkstream. -If this test fails and none else, it's likely the -spaces at the ends of the lines that are missing. -They aren't easily seen without looking at the -data in an editor and checking where the end of -line is. - -Testing Space stuffing - From shouldn't start a line. - Nor a space. - > Neither should a greater-than symbol. --- -Above is NOT a sig delimiter since it's missing -the trailing SPACE: - --- -Above is a sig delimiter. I hope. -A sig (wow!). diff --git a/htmlparser/tests/outsinks/xifdthtml.out b/htmlparser/tests/outsinks/xifdthtml.out deleted file mode 100644 index 31fa952fb891..000000000000 --- a/htmlparser/tests/outsinks/xifdthtml.out +++ /dev/null @@ -1,3 +0,0 @@ - -Here is some content inside a doctype - diff --git a/htmlparser/tests/outsinks/xifdtplain.out b/htmlparser/tests/outsinks/xifdtplain.out deleted file mode 100644 index 8b2ae30d607d..000000000000 --- a/htmlparser/tests/outsinks/xifdtplain.out +++ /dev/null @@ -1 +0,0 @@ -Here is some content inside a doctype diff --git a/htmlparser/tests/outsinks/xifstuff.out b/htmlparser/tests/outsinks/xifstuff.out deleted file mode 100644 index 0b52b27b3345..000000000000 --- a/htmlparser/tests/outsinks/xifstuff.out +++ /dev/null @@ -1,41 +0,0 @@ - - - - -XIF Test Page - - - - -

    Here's the deal...

    - -

    This is a good place to add in html to aid in testing features -under development. -<a href="http://www.mozilla.org"> -It's also a great place to not use latin. - - -

    - diff --git a/htmlparser/tests/outsinks/xifstuff.xif b/htmlparser/tests/outsinks/xifstuff.xif deleted file mode 100644 index b3169e13a3f4..000000000000 --- a/htmlparser/tests/outsinks/xifstuff.xif +++ /dev/null @@ -1,95 +0,0 @@ - - - -
    - - - - - - - - - - - - -- The contents of this file are subject to the Netscape Public - -- License Version 1.1 (the "License"); you may not use this file - -- except in compliance with the License. You may obtain a copy of - -- the License at http://www.mozilla.org/NPL/ - -- - -- Software distributed under the License is distributed on an "AS - -- IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or - -- implied. See the License for the specific language governing - -- rights and limitations under the License. - -- - -- The Original Code is Mozilla Communicator client code, released - -- March 31, 1998. - -- - -- The Initial Developer of the Original Code is Netscape - -- Communications Corporation. Portions created by Netscape are - -- Copyright (C) 1998-1999 Netscape Communications Corporation. All - -- Rights Reserved. - -- - -- Contributor(s): - - - - - - -XIF Test Page - - - - - - - - - - - - - - - - - - - -Here's the deal... - - - - - - -This is a good place to add in - -html - - to aid in testing features -under development. - - -a href="http://www.mozilla.org" - - - -It's also a great place to not use latin. - - - This is a comment; - Here is more of the comment. - - - - - - - - - - -
    diff --git a/htmlparser/tools/genentities.pl b/htmlparser/tools/genentities.pl deleted file mode 100644 index 7df66ac7b015..000000000000 --- a/htmlparser/tools/genentities.pl +++ /dev/null @@ -1,430 +0,0 @@ -#! /usr/local/bin/perl - -# ***** BEGIN LICENSE BLOCK ***** -# Version: MPL 1.1/GPL 2.0/LGPL 2.1 -# -# The contents of this file are subject to the Mozilla Public License Version -# 1.1 (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# http://www.mozilla.org/MPL/ -# -# Software distributed under the License is distributed on an "AS IS" basis, -# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License -# for the specific language governing rights and limitations under the -# License. -# -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# -# Alternatively, the contents of this file may be used under the terms of -# either of the GNU General Public License Version 2 or later (the "GPL"), -# or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), -# in which case the provisions of the GPL or the LGPL are applicable instead -# of those above. If you wish to allow use of your version of this file only -# under the terms of either the GPL or the LGPL, and not to allow others to -# use your version of this file under the terms of the MPL, indicate your -# decision by deleting the provisions above and replace them with the notice -# and other provisions required by the GPL or the LGPL. If you do not delete -# the provisions above, a recipient may use your version of this file under -# the terms of any one of the MPL, the GPL or the LGPL. -# -# ***** END LICENSE BLOCK ***** - -###################################################################### - -# ISO 8859-1 entities. -# See the HTML4.0 spec for this list in it's DTD form -$i = 0; -$entity[$i++] = "nbsp"; $value{"nbsp"} = "160"; -$entity[$i++] = "iexcl"; $value{"iexcl"} = "161"; -$entity[$i++] = "cent"; $value{"cent"} = "162"; -$entity[$i++] = "pound"; $value{"pound"} = "163"; -$entity[$i++] = "curren"; $value{"curren"} = "164"; -$entity[$i++] = "yen"; $value{"yen"} = "165"; -$entity[$i++] = "brvbar"; $value{"brvbar"} = "166"; -$entity[$i++] = "sect"; $value{"sect"} = "167"; -$entity[$i++] = "uml"; $value{"uml"} = "168"; -$entity[$i++] = "copy"; $value{"copy"} = "169"; -$entity[$i++] = "ordf"; $value{"ordf"} = "170"; -$entity[$i++] = "laquo"; $value{"laquo"} = "171"; -$entity[$i++] = "not"; $value{"not"} = "172"; -$entity[$i++] = "shy"; $value{"shy"} = "173"; -$entity[$i++] = "reg"; $value{"reg"} = "174"; -$entity[$i++] = "macr"; $value{"macr"} = "175"; -$entity[$i++] = "deg"; $value{"deg"} = "176"; -$entity[$i++] = "plusmn"; $value{"plusmn"} = "177"; -$entity[$i++] = "sup2"; $value{"sup2"} = "178"; -$entity[$i++] = "sup3"; $value{"sup3"} = "179"; -$entity[$i++] = "acute"; $value{"acute"} = "180"; -$entity[$i++] = "micro"; $value{"micro"} = "181"; -$entity[$i++] = "para"; $value{"para"} = "182"; -$entity[$i++] = "middot"; $value{"middot"} = "183"; -$entity[$i++] = "cedil"; $value{"cedil"} = "184"; -$entity[$i++] = "sup1"; $value{"sup1"} = "185"; -$entity[$i++] = "ordm"; $value{"ordm"} = "186"; -$entity[$i++] = "raquo"; $value{"raquo"} = "187"; -$entity[$i++] = "frac14"; $value{"frac14"} = "188"; -$entity[$i++] = "frac12"; $value{"frac12"} = "189"; -$entity[$i++] = "frac34"; $value{"frac34"} = "190"; -$entity[$i++] = "iquest"; $value{"iquest"} = "191"; -$entity[$i++] = "Agrave"; $value{"Agrave"} = "192"; -$entity[$i++] = "Aacute"; $value{"Aacute"} = "193"; -$entity[$i++] = "Acirc"; $value{"Acirc"} = "194"; -$entity[$i++] = "Atilde"; $value{"Atilde"} = "195"; -$entity[$i++] = "Auml"; $value{"Auml"} = "196"; -$entity[$i++] = "Aring"; $value{"Aring"} = "197"; -$entity[$i++] = "AElig"; $value{"AElig"} = "198"; -$entity[$i++] = "Ccedil"; $value{"Ccedil"} = "199"; -$entity[$i++] = "Egrave"; $value{"Egrave"} = "200"; -$entity[$i++] = "Eacute"; $value{"Eacute"} = "201"; -$entity[$i++] = "Ecirc"; $value{"Ecirc"} = "202"; -$entity[$i++] = "Euml"; $value{"Euml"} = "203"; -$entity[$i++] = "Igrave"; $value{"Igrave"} = "204"; -$entity[$i++] = "Iacute"; $value{"Iacute"} = "205"; -$entity[$i++] = "Icirc"; $value{"Icirc"} = "206"; -$entity[$i++] = "Iuml"; $value{"Iuml"} = "207"; -$entity[$i++] = "ETH"; $value{"ETH"} = "208"; -$entity[$i++] = "Ntilde"; $value{"Ntilde"} = "209"; -$entity[$i++] = "Ograve"; $value{"Ograve"} = "210"; -$entity[$i++] = "Oacute"; $value{"Oacute"} = "211"; -$entity[$i++] = "Ocirc"; $value{"Ocirc"} = "212"; -$entity[$i++] = "Otilde"; $value{"Otilde"} = "213"; -$entity[$i++] = "Ouml"; $value{"Ouml"} = "214"; -$entity[$i++] = "times"; $value{"times"} = "215"; -$entity[$i++] = "Oslash"; $value{"Oslash"} = "216"; -$entity[$i++] = "Ugrave"; $value{"Ugrave"} = "217"; -$entity[$i++] = "Uacute"; $value{"Uacute"} = "218"; -$entity[$i++] = "Ucirc"; $value{"Ucirc"} = "219"; -$entity[$i++] = "Uuml"; $value{"Uuml"} = "220"; -$entity[$i++] = "Yacute"; $value{"Yacute"} = "221"; -$entity[$i++] = "THORN"; $value{"THORN"} = "222"; -$entity[$i++] = "szlig"; $value{"szlig"} = "223"; -$entity[$i++] = "agrave"; $value{"agrave"} = "224"; -$entity[$i++] = "aacute"; $value{"aacute"} = "225"; -$entity[$i++] = "acirc"; $value{"acirc"} = "226"; -$entity[$i++] = "atilde"; $value{"atilde"} = "227"; -$entity[$i++] = "auml"; $value{"auml"} = "228"; -$entity[$i++] = "aring"; $value{"aring"} = "229"; -$entity[$i++] = "aelig"; $value{"aelig"} = "230"; -$entity[$i++] = "ccedil"; $value{"ccedil"} = "231"; -$entity[$i++] = "egrave"; $value{"egrave"} = "232"; -$entity[$i++] = "eacute"; $value{"eacute"} = "233"; -$entity[$i++] = "ecirc"; $value{"ecirc"} = "234"; -$entity[$i++] = "euml"; $value{"euml"} = "235"; -$entity[$i++] = "igrave"; $value{"igrave"} = "236"; -$entity[$i++] = "iacute"; $value{"iacute"} = "237"; -$entity[$i++] = "icirc"; $value{"icirc"} = "238"; -$entity[$i++] = "iuml"; $value{"iuml"} = "239"; -$entity[$i++] = "eth"; $value{"eth"} = "240"; -$entity[$i++] = "ntilde"; $value{"ntilde"} = "241"; -$entity[$i++] = "ograve"; $value{"ograve"} = "242"; -$entity[$i++] = "oacute"; $value{"oacute"} = "243"; -$entity[$i++] = "ocirc"; $value{"ocirc"} = "244"; -$entity[$i++] = "otilde"; $value{"otilde"} = "245"; -$entity[$i++] = "ouml"; $value{"ouml"} = "246"; -$entity[$i++] = "divide"; $value{"divide"} = "247"; -$entity[$i++] = "oslash"; $value{"oslash"} = "248"; -$entity[$i++] = "ugrave"; $value{"ugrave"} = "249"; -$entity[$i++] = "uacute"; $value{"uacute"} = "250"; -$entity[$i++] = "ucirc"; $value{"ucirc"} = "251"; -$entity[$i++] = "uuml"; $value{"uuml"} = "252"; -$entity[$i++] = "yacute"; $value{"yacute"} = "253"; -$entity[$i++] = "thorn"; $value{"thorn"} = "254"; -$entity[$i++] = "yuml"; $value{"yuml"} = "255"; - -# Symbols, mathematical symbols and Greek letters -# See the HTML4.0 spec for this list in it's DTD form -$entity[$i++] = "fnof"; $value{"fnof"} = "402"; -$entity[$i++] = "Alpha"; $value{"Alpha"} = "913"; -$entity[$i++] = "Beta"; $value{"Beta"} = "914"; -$entity[$i++] = "Gamma"; $value{"Gamma"} = "915"; -$entity[$i++] = "Delta"; $value{"Delta"} = "916"; -$entity[$i++] = "Epsilon"; $value{"Epsilon"} = "917"; -$entity[$i++] = "Zeta"; $value{"Zeta"} = "918"; -$entity[$i++] = "Eta"; $value{"Eta"} = "919"; -$entity[$i++] = "Theta"; $value{"Theta"} = "920"; -$entity[$i++] = "Iota"; $value{"Iota"} = "921"; -$entity[$i++] = "Kappa"; $value{"Kappa"} = "922"; -$entity[$i++] = "Lambda"; $value{"Lambda"} = "923"; -$entity[$i++] = "Mu"; $value{"Mu"} = "924"; -$entity[$i++] = "Nu"; $value{"Nu"} = "925"; -$entity[$i++] = "Xi"; $value{"Xi"} = "926"; -$entity[$i++] = "Omicron"; $value{"Omicron"} = "927"; -$entity[$i++] = "Pi"; $value{"Pi"} = "928"; -$entity[$i++] = "Rho"; $value{"Rho"} = "929"; -$entity[$i++] = "Sigma"; $value{"Sigma"} = "931"; -$entity[$i++] = "Tau"; $value{"Tau"} = "932"; -$entity[$i++] = "Upsilon"; $value{"Upsilon"} = "933"; -$entity[$i++] = "Phi"; $value{"Phi"} = "934"; -$entity[$i++] = "Chi"; $value{"Chi"} = "935"; -$entity[$i++] = "Psi"; $value{"Psi"} = "936"; -$entity[$i++] = "Omega"; $value{"Omega"} = "937"; -$entity[$i++] = "alpha"; $value{"alpha"} = "945"; -$entity[$i++] = "beta"; $value{"beta"} = "946"; -$entity[$i++] = "gamma"; $value{"gamma"} = "947"; -$entity[$i++] = "delta"; $value{"delta"} = "948"; -$entity[$i++] = "epsilon"; $value{"epsilon"} = "949"; -$entity[$i++] = "zeta"; $value{"zeta"} = "950"; -$entity[$i++] = "eta"; $value{"eta"} = "951"; -$entity[$i++] = "theta"; $value{"theta"} = "952"; -$entity[$i++] = "iota"; $value{"iota"} = "953"; -$entity[$i++] = "kappa"; $value{"kappa"} = "954"; -$entity[$i++] = "lambda"; $value{"lambda"} = "955"; -$entity[$i++] = "mu"; $value{"mu"} = "956"; -$entity[$i++] = "nu"; $value{"nu"} = "957"; -$entity[$i++] = "xi"; $value{"xi"} = "958"; -$entity[$i++] = "omicron"; $value{"omicron"} = "959"; -$entity[$i++] = "pi"; $value{"pi"} = "960"; -$entity[$i++] = "rho"; $value{"rho"} = "961"; -$entity[$i++] = "sigmaf"; $value{"sigmaf"} = "962"; -$entity[$i++] = "sigma"; $value{"sigma"} = "963"; -$entity[$i++] = "tau"; $value{"tau"} = "964"; -$entity[$i++] = "upsilon"; $value{"upsilon"} = "965"; -$entity[$i++] = "phi"; $value{"phi"} = "966"; -$entity[$i++] = "chi"; $value{"chi"} = "967"; -$entity[$i++] = "psi"; $value{"psi"} = "968"; -$entity[$i++] = "omega"; $value{"omega"} = "969"; -$entity[$i++] = "thetasym"; $value{"thetasym"} = "977"; -$entity[$i++] = "upsih"; $value{"upsih"} = "978"; -$entity[$i++] = "piv"; $value{"piv"} = "982"; -$entity[$i++] = "bull"; $value{"bull"} = "8226"; -$entity[$i++] = "hellip"; $value{"hellip"} = "8230"; -$entity[$i++] = "prime"; $value{"prime"} = "8242"; -$entity[$i++] = "Prime"; $value{"Prime"} = "8243"; -$entity[$i++] = "oline"; $value{"oline"} = "8254"; -$entity[$i++] = "frasl"; $value{"frasl"} = "8260"; -$entity[$i++] = "weierp"; $value{"weierp"} = "8472"; -$entity[$i++] = "image"; $value{"image"} = "8465"; -$entity[$i++] = "real"; $value{"real"} = "8476"; -$entity[$i++] = "trade"; $value{"trade"} = "8482"; -$entity[$i++] = "alefsym"; $value{"alefsym"} = "8501"; -$entity[$i++] = "larr"; $value{"larr"} = "8592"; -$entity[$i++] = "uarr"; $value{"uarr"} = "8593"; -$entity[$i++] = "rarr"; $value{"rarr"} = "8594"; -$entity[$i++] = "darr"; $value{"darr"} = "8595"; -$entity[$i++] = "harr"; $value{"harr"} = "8596"; -$entity[$i++] = "crarr"; $value{"crarr"} = "8629"; -$entity[$i++] = "lArr"; $value{"lArr"} = "8656"; -$entity[$i++] = "uArr"; $value{"uArr"} = "8657"; -$entity[$i++] = "rArr"; $value{"rArr"} = "8658"; -$entity[$i++] = "dArr"; $value{"dArr"} = "8659"; -$entity[$i++] = "hArr"; $value{"hArr"} = "8660"; -$entity[$i++] = "forall"; $value{"forall"} = "8704"; -$entity[$i++] = "part"; $value{"part"} = "8706"; -$entity[$i++] = "exist"; $value{"exist"} = "8707"; -$entity[$i++] = "empty"; $value{"empty"} = "8709"; -$entity[$i++] = "nabla"; $value{"nabla"} = "8711"; -$entity[$i++] = "isin"; $value{"isin"} = "8712"; -$entity[$i++] = "notin"; $value{"notin"} = "8713"; -$entity[$i++] = "ni"; $value{"ni"} = "8715"; -$entity[$i++] = "prod"; $value{"prod"} = "8719"; -$entity[$i++] = "sum"; $value{"sum"} = "8721"; -$entity[$i++] = "minus"; $value{"minus"} = "8722"; -$entity[$i++] = "lowast"; $value{"lowast"} = "8727"; -$entity[$i++] = "radic"; $value{"radic"} = "8730"; -$entity[$i++] = "prop"; $value{"prop"} = "8733"; -$entity[$i++] = "infin"; $value{"infin"} = "8734"; -$entity[$i++] = "ang"; $value{"ang"} = "8736"; -$entity[$i++] = "and"; $value{"and"} = "8743"; -$entity[$i++] = "or"; $value{"or"} = "8744"; -$entity[$i++] = "cap"; $value{"cap"} = "8745"; -$entity[$i++] = "cup"; $value{"cup"} = "8746"; -$entity[$i++] = "int"; $value{"int"} = "8747"; -$entity[$i++] = "there4"; $value{"there4"} = "8756"; -$entity[$i++] = "sim"; $value{"sim"} = "8764"; -$entity[$i++] = "cong"; $value{"cong"} = "8773"; -$entity[$i++] = "asymp"; $value{"asymp"} = "8776"; -$entity[$i++] = "ne"; $value{"ne"} = "8800"; -$entity[$i++] = "equiv"; $value{"equiv"} = "8801"; -$entity[$i++] = "le"; $value{"le"} = "8804"; -$entity[$i++] = "ge"; $value{"ge"} = "8805"; -$entity[$i++] = "sub"; $value{"sub"} = "8834"; -$entity[$i++] = "sup"; $value{"sup"} = "8835"; -$entity[$i++] = "nsub"; $value{"nsub"} = "8836"; -$entity[$i++] = "sube"; $value{"sube"} = "8838"; -$entity[$i++] = "supe"; $value{"supe"} = "8839"; -$entity[$i++] = "oplus"; $value{"oplus"} = "8853"; -$entity[$i++] = "otimes"; $value{"otimes"} = "8855"; -$entity[$i++] = "perp"; $value{"perp"} = "8869"; -$entity[$i++] = "sdot"; $value{"sdot"} = "8901"; -$entity[$i++] = "lceil"; $value{"lceil"} = "8968"; -$entity[$i++] = "rceil"; $value{"rceil"} = "8969"; -$entity[$i++] = "lfloor"; $value{"lfloor"} = "8970"; -$entity[$i++] = "rfloor"; $value{"rfloor"} = "8971"; -$entity[$i++] = "lang"; $value{"lang"} = "9001"; -$entity[$i++] = "rang"; $value{"rang"} = "9002"; -$entity[$i++] = "loz"; $value{"loz"} = "9674"; -$entity[$i++] = "spades"; $value{"spades"} = "9824"; -$entity[$i++] = "clubs"; $value{"clubs"} = "9827"; -$entity[$i++] = "hearts"; $value{"hearts"} = "9829"; -$entity[$i++] = "diams"; $value{"diams"} = "9830"; - -# Markup-significant and internationalization characters -# See the HTML4.0 spec for this list in it's DTD form -$entity[$i++] = "quot"; $value{"quot"} = "34"; -$entity[$i++] = "amp"; $value{"amp"} = "38"; -$entity[$i++] = "lt"; $value{"lt"} = "60"; -$entity[$i++] = "gt"; $value{"gt"} = "62"; -$entity[$i++] = "OElig"; $value{"OElig"} = "338"; -$entity[$i++] = "oelig"; $value{"oelig"} = "339"; -$entity[$i++] = "Scaron"; $value{"Scaron"} = "352"; -$entity[$i++] = "scaron"; $value{"scaron"} = "353"; -$entity[$i++] = "Yuml"; $value{"Yuml"} = "376"; -$entity[$i++] = "circ"; $value{"circ"} = "710"; -$entity[$i++] = "tilde"; $value{"tilde"} = "732"; -$entity[$i++] = "ensp"; $value{"ensp"} = "8194"; -$entity[$i++] = "emsp"; $value{"emsp"} = "8195"; -$entity[$i++] = "thinsp"; $value{"thinsp"} = "8201"; -$entity[$i++] = "zwnj"; $value{"zwnj"} = "8204"; -$entity[$i++] = "zwj"; $value{"zwj"} = "8205"; -$entity[$i++] = "lrm"; $value{"lrm"} = "8206"; -$entity[$i++] = "rlm"; $value{"rlm"} = "8207"; -$entity[$i++] = "ndash"; $value{"ndash"} = "8211"; -$entity[$i++] = "mdash"; $value{"mdash"} = "8212"; -$entity[$i++] = "lsquo"; $value{"lsquo"} = "8216"; -$entity[$i++] = "rsquo"; $value{"rsquo"} = "8217"; -$entity[$i++] = "sbquo"; $value{"sbquo"} = "8218"; -$entity[$i++] = "ldquo"; $value{"ldquo"} = "8220"; -$entity[$i++] = "rdquo"; $value{"rdquo"} = "8221"; -$entity[$i++] = "bdquo"; $value{"bdquo"} = "8222"; -$entity[$i++] = "dagger"; $value{"dagger"} = "8224"; -$entity[$i++] = "Dagger"; $value{"Dagger"} = "8225"; -$entity[$i++] = "permil"; $value{"permil"} = "8240"; -$entity[$i++] = "lsaquo"; $value{"lsaquo"} = "8249"; -$entity[$i++] = "rsaquo"; $value{"rsaquo"} = "8250"; -$entity[$i++] = "euro"; $value{"euro"} = "8364"; - -# Navigator entity extensions -$entity[$i++] = "AMP"; $value{"AMP"} = "38"; -$entity[$i++] = "COPY"; $value{"COPY"} = "169"; -$entity[$i++] = "GT"; $value{"GT"} = "62"; -$entity[$i++] = "LT"; $value{"LT"} = "60"; -$entity[$i++] = "QUOT"; $value{"QUOT"} = "34"; -$entity[$i++] = "REG"; $value{"REG"} = "174"; - -###################################################################### - -# Sort the entity table before using it -@entity = sort @entity; - -$copyright = "/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- - * - * The contents of this file are subject to the Netscape Public License - * Version 1.0 (the \"License\"); you may not use this file except in - * compliance with the License. You may obtain a copy of the License at - * http://www.mozilla.org/NPL/ - * - * Software distributed under the License is distributed on an \"AS IS\" - * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See - * the License for the specific language governing rights and limitations - * under the License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is Netscape Communications - * Corporation. Portions created by Netscape are Copyright (C) 1998 - * Netscape Communications Corporation. All Rights Reserved. - */ - -/* Do not edit - generated by genentities.pl */ -"; - -###################################################################### - -$file_base = @ARGV[0]; - -# Generate the source file -open(CPP_FILE, ">$file_base.cpp"); -print CPP_FILE $copyright; -print CPP_FILE "#include \"nsCRT.h\"\n"; -print CPP_FILE "#include \"" . $file_base . ".h\"\n\n"; - -# Print out table of tag names -print CPP_FILE "static struct { char* mEntity; PRInt32 mValue; } entityTable[$i] = {\n "; -$width = 2; -for ($j = 0; $j < $i; $j++) { - $key = $entity[$j]; - $val = $value{$key}; - $str = "{ \"" . $key . "\", " . $val . " }"; - if ($j < $i - 1) { - $str = $str . ", "; - } - $len = length($str); - if ($width + $len > 78) { - print CPP_FILE "\n "; - $width = 2; - } - print CPP_FILE $str; - $width = $width + $len; -} -print CPP_FILE "\n};\n"; -print CPP_FILE "#define NS_HTML_ENTITY_MAX " . $i . "\n"; - -# Finally, dump out the search routine that takes a char* and finds it -# in the table. -print CPP_FILE " -PRInt32 NS_EntityToUnicode(const char* aEntity) { - int low = 0; - int high = NS_HTML_ENTITY_MAX - 1; - while (low <= high) { - int middle = (low + high) >> 1; - int result = nsCRT::strcmp(aEntity, entityTable[middle].mEntity); - if (result == 0) - return entityTable[middle].mValue; - if (result < 0) - high = middle - 1; - else - low = middle + 1; - } - return -1; -} - -// XXX - WARNING, slow, we should have -// a much faster routine instead of scanning -// the entire list -const char* NS_UnicodeToEntity(PRInt32 aCode) -{ - for (PRInt32 i = 0; i < NS_HTML_ENTITY_MAX; i++) - { - if (entityTable[i].mValue == aCode) - return entityTable[i].mEntity; - } - return nsnull; -} - - -#ifdef NS_DEBUG -#include - -class nsTestEntityTable { -public: - nsTestEntityTable() { - const char *entity; - PRInt32 value; - - // Make sure we can find everything we are supposed to - for (int i = 0; i < NS_HTML_ENTITY_MAX; i++) { - entity = entityTable[i].mEntity; - value = NS_EntityToUnicode(entity); - NS_ASSERTION(value != -1, \"can't find entity\"); - } - - // Make sure we don't find things that aren't there - value = NS_EntityToUnicode(\"@\"); - NS_ASSERTION(value == -1, \"found @\"); - value = NS_EntityToUnicode(\"zzzzz\"); - NS_ASSERTION(value == -1, \"found zzzzz\"); - } -}; -nsTestEntityTable validateEntityTable; -#endif - -"; - -close(CPP_FILE); diff --git a/htmlparser/tools/gentags.pl b/htmlparser/tools/gentags.pl deleted file mode 100644 index dd18cb992f9b..000000000000 --- a/htmlparser/tools/gentags.pl +++ /dev/null @@ -1,356 +0,0 @@ -#! /usr/local/bin/perl - -# ***** BEGIN LICENSE BLOCK ***** -# Version: MPL 1.1/GPL 2.0/LGPL 2.1 -# -# The contents of this file are subject to the Mozilla Public License Version -# 1.1 (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# http://www.mozilla.org/MPL/ -# -# Software distributed under the License is distributed on an "AS IS" basis, -# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License -# for the specific language governing rights and limitations under the -# License. -# -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# -# Alternatively, the contents of this file may be used under the terms of -# either of the GNU General Public License Version 2 or later (the "GPL"), -# or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), -# in which case the provisions of the GPL or the LGPL are applicable instead -# of those above. If you wish to allow use of your version of this file only -# under the terms of either the GPL or the LGPL, and not to allow others to -# use your version of this file under the terms of the MPL, indicate your -# decision by deleting the provisions above and replace them with the notice -# and other provisions required by the GPL or the LGPL. If you do not delete -# the provisions above, a recipient may use your version of this file under -# the terms of any one of the MPL, the GPL or the LGPL. -# -# ***** END LICENSE BLOCK ***** - -###################################################################### - -# Table of tag names; it doesn't have to be sorted because code -# below will do it. However, for the sake of ease of additions, keep -# it sorted so that its easy to tell where to add a new tag and that -# the tag hasn't already been added. -$i = 0; -$tags[$i++] = "a"; -$tags[$i++] = "abbr"; -$tags[$i++] = "acronym"; -$tags[$i++] = "address"; -$tags[$i++] = "applet"; -$tags[$i++] = "area"; -$tags[$i++] = "b"; -$tags[$i++] = "base"; -$tags[$i++] = "basefont"; -$tags[$i++] = "bdo"; -$tags[$i++] = "bgsound"; -$tags[$i++] = "big"; -$tags[$i++] = "blink"; -$tags[$i++] = "blockquote"; -$tags[$i++] = "body"; -$tags[$i++] = "br"; -$tags[$i++] = "button"; -$tags[$i++] = "caption"; -$tags[$i++] = "center"; -$tags[$i++] = "cite"; -$tags[$i++] = "code"; -$tags[$i++] = "col"; -$tags[$i++] = "colgroup"; -$tags[$i++] = "dd"; -$tags[$i++] = "del"; -$tags[$i++] = "dfn"; -$tags[$i++] = "dir"; -$tags[$i++] = "div"; -$tags[$i++] = "dl"; -$tags[$i++] = "dt"; -$tags[$i++] = "em"; -$tags[$i++] = "embed"; -$tags[$i++] = "endnote"; -$tags[$i++] = "fieldset"; -$tags[$i++] = "font"; -$tags[$i++] = "form"; -$tags[$i++] = "frame"; -$tags[$i++] = "frameset"; -$tags[$i++] = "h1"; -$tags[$i++] = "h2"; -$tags[$i++] = "h3"; -$tags[$i++] = "h4"; -$tags[$i++] = "h5"; -$tags[$i++] = "h6"; -$tags[$i++] = "head"; -$tags[$i++] = "hr"; -$tags[$i++] = "html"; -$tags[$i++] = "i"; -$tags[$i++] = "iframe"; -$tags[$i++] = "ilayer"; -$tags[$i++] = "image"; -$tags[$i++] = "img"; -$tags[$i++] = "input"; -$tags[$i++] = "ins"; -$tags[$i++] = "isindex"; -$tags[$i++] = "kbd"; -$tags[$i++] = "keygen"; -$tags[$i++] = "label"; -$tags[$i++] = "layer"; -$tags[$i++] = "legend"; -$tags[$i++] = "li"; -$tags[$i++] = "link"; -$tags[$i++] = "listing"; -$tags[$i++] = "map"; -$tags[$i++] = "menu"; -$tags[$i++] = "meta"; -$tags[$i++] = "multicol"; -$tags[$i++] = "nobr"; -$tags[$i++] = "noembed"; -$tags[$i++] = "noframes"; -$tags[$i++] = "nolayer"; -$tags[$i++] = "noscript"; -$tags[$i++] = "object"; -$tags[$i++] = "ol"; -$tags[$i++] = "optgroup"; -$tags[$i++] = "option"; -$tags[$i++] = "p"; -$tags[$i++] = "param"; -$tags[$i++] = "parsererror"; -$tags[$i++] = "plaintext"; -$tags[$i++] = "pre"; -$tags[$i++] = "q"; -$tags[$i++] = "s"; -$tags[$i++] = "samp"; -$tags[$i++] = "script"; -$tags[$i++] = "select"; -$tags[$i++] = "server"; -$tags[$i++] = "small"; -$tags[$i++] = "sound"; -$tags[$i++] = "sourcetext"; -$tags[$i++] = "spacer"; -$tags[$i++] = "span"; -$tags[$i++] = "strike"; -$tags[$i++] = "strong"; -$tags[$i++] = "style"; -$tags[$i++] = "sub"; -$tags[$i++] = "sup"; -$tags[$i++] = "table"; -$tags[$i++] = "tbody"; -$tags[$i++] = "td"; -$tags[$i++] = "textarea"; -$tags[$i++] = "tfoot"; -$tags[$i++] = "th"; -$tags[$i++] = "thead"; -$tags[$i++] = "title"; -$tags[$i++] = "tr"; -$tags[$i++] = "tt"; -$tags[$i++] = "u"; -$tags[$i++] = "ul"; -$tags[$i++] = "var"; -$tags[$i++] = "wbr"; -$tags[$i++] = "xmp"; - -###################################################################### - -# These are not tags; rather they are extra values to place into the -# tag enumeration after the normal tags. These do not need to be sorted -# and they do not go into the tag table, just into the tag enumeration. -$extra = 0; -$extra_tags[$extra++] = "text"; -$extra_tags[$extra++] = "whitespace"; -$extra_tags[$extra++] = "newline"; -$extra_tags[$extra++] = "comment"; -$extra_tags[$extra++] = "entity"; -$extra_tags[$extra++] = "userdefined"; -$extra_tags[$extra++] = "secret_h1style"; -$extra_tags[$extra++] = "secret_h2style"; -$extra_tags[$extra++] = "secret_h3style"; -$extra_tags[$extra++] = "secret_h4style"; -$extra_tags[$extra++] = "secret_h5style"; -$extra_tags[$extra++] = "secret_h6style"; - -###################################################################### - -# Sort the tag table before using it -@tags = sort @tags; - -$copyright = "/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- - * - * The contents of this file are subject to the Netscape Public License - * Version 1.0 (the \"License\"); you may not use this file except in - * compliance with the License. You may obtain a copy of the License at - * http://www.mozilla.org/NPL/ - * - * Software distributed under the License is distributed on an \"AS IS\" - * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See - * the License for the specific language governing rights and limitations - * under the License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is Netscape Communications - * Corporation. Portions created by Netscape are Copyright (C) 1998 - * Netscape Communications Corporation. All Rights Reserved. - */ - -/* Do not edit - generated by gentags.pl */ -"; - -###################################################################### - -$file_base = @ARGV[0]; - -# Generate the header file first -open(HEADER_FILE, ">$file_base.h"); - -# Print out copyright and do not edit notice -print HEADER_FILE $copyright; -print HEADER_FILE "#ifndef " . $file_base . "_h___\n"; -print HEADER_FILE "#define " . $file_base . "_h___\n"; - -# Print out enum's for the tag symbols -print HEADER_FILE "enum nsHTMLTag {\n"; -print HEADER_FILE " /* this enum must be first and must be zero */\n"; -print HEADER_FILE " eHTMLTag_unknown=0,\n\n"; -print HEADER_FILE " /* begin tag enums */\n "; -$width = 2; -print HEADER_FILE $str; -for ($j = 0; $j < $i; $j++) { - $lower = $tags[$j]; - $lower =~ tr/A-Z/a-z/; - $str = "eHTMLTag_" . $lower . "=" . ($j + 1); - $str = $str . ", "; - $len = length($str); - if ($width + $len > 78) { - print HEADER_FILE "\n "; - $width = 2; - } - print HEADER_FILE $str; - $width = $width + $len; -} -print HEADER_FILE "\n\n /* The remaining enums are not for tags */\n "; - -# Print out extra enum's that are not in the tag table -$width = 2; -for ($k = 0; $k < $extra; $k++) { - $lower = $extra_tags[$k]; - $lower =~ tr/A-Z/a-z/; - $str = "eHTMLTag_" . $lower . "=" . ($j + $k + 1); - if ($k < $extra - 1) { - $str = $str . ", "; - } - $len = length($str); - if ($width + $len > 78) { - print HEADER_FILE "\n "; - $width = 2; - } - print HEADER_FILE $str; - $width = $width + $len; -} - -print HEADER_FILE "\n};\n#define NS_HTML_TAG_MAX " . $j . "\n\n"; -print HEADER_FILE - "extern nsHTMLTag NS_TagToEnum(const char* aTag);\n"; -print HEADER_FILE - "extern const char* NS_EnumToTag(nsHTMLTag aEnum);\n\n"; -print HEADER_FILE "#endif /* " . $file_base . "_h___ */\n"; -close(HEADER_FILE); - -###################################################################### - -# Generate the source file -open(CPP_FILE, ">$file_base.cpp"); -print CPP_FILE $copyright; -print CPP_FILE "#include \"nsCRT.h\"\n"; -print CPP_FILE "#include \"$file_base.h\"\n\n"; - -# Print out table of tag names -print CPP_FILE "static char* tagTable[] = {\n "; -$width = 2; -for ($j = 0; $j < $i; $j++) { - $lower = $tags[$j]; - $lower =~ tr/A-Z/a-z/; - $str = "\"" . $lower . "\""; - if ($j < $i - 1) { - $str = $str . ", "; - } - $len = length($str); - if ($width + $len > 78) { - print CPP_FILE "\n "; - $width = 2; - } - print CPP_FILE $str; - $width = $width + $len; -} -print CPP_FILE "\n};\n"; - -# Finally, dump out the search routine that takes a char* and finds it -# in the table. -print CPP_FILE " -nsHTMLTag NS_TagToEnum(const char* aTagName) { - int low = 0; - int high = NS_HTML_TAG_MAX - 1; - while (low <= high) { - int middle = (low + high) >> 1; - int result = nsCRT::strcasecmp(aTagName, tagTable[middle]); - if (result == 0) - return (nsHTMLTag) (middle + 1); - if (result < 0) - high = middle - 1; - else - low = middle + 1; - } - return eHTMLTag_userdefined; -} - -const char* NS_EnumToTag(nsHTMLTag aTagID) { - if ((int(aTagID) <= 0) || (int(aTagID) > NS_HTML_TAG_MAX)) { - return 0; - } - return tagTable[int(aTagID) - 1]; -} - -#ifdef NS_DEBUG -#include - -class nsTestTagTable { -public: - nsTestTagTable() { - const char *tag; - nsHTMLTag id; - - // Make sure we can find everything we are supposed to - for (int i = 0; i < NS_HTML_TAG_MAX; i++) { - tag = tagTable[i]; - id = NS_TagToEnum(tag); - NS_ASSERTION(id != eHTMLTag_userdefined, \"can't find tag id\"); - const char* check = NS_EnumToTag(id); - NS_ASSERTION(check == tag, \"can't map id back to tag\"); - } - - // Make sure we don't find things that aren't there - id = NS_TagToEnum(\"@\"); - NS_ASSERTION(id == eHTMLTag_userdefined, \"found @\"); - id = NS_TagToEnum(\"zzzzz\"); - NS_ASSERTION(id == eHTMLTag_userdefined, \"found zzzzz\"); - - tag = NS_EnumToTag((nsHTMLTag) 0); - NS_ASSERTION(0 == tag, \"found enum 0\"); - tag = NS_EnumToTag((nsHTMLTag) -1); - NS_ASSERTION(0 == tag, \"found enum -1\"); - tag = NS_EnumToTag((nsHTMLTag) (NS_HTML_TAG_MAX + 1)); - NS_ASSERTION(0 == tag, \"found past max enum\"); - } -}; -nsTestTagTable validateTagTable; -#endif - -"; - -close(CPP_FILE);