added runtime 'marker-style' entities and fixed a few bugs.

This commit is contained in:
rickg 1998-04-16 07:22:40 +00:00
Родитель 3ced7a3750
Коммит 880799b4e1
12 изменённых файлов: 70 добавлений и 38 удалений

Просмотреть файл

@ -301,6 +301,7 @@ PRBool nsHTMLParser::Parse(nsIURL* aURL){
mTokenizer=new CTokenizer(aURL, delegate);
mTokenizer->Tokenize();
#define VERBOSE_DEBUG
#ifdef VERBOSE_DEBUG
mTokenizer->DebugDumpTokens(cout);
#endif

Просмотреть файл

@ -14,16 +14,23 @@
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
*/
#include <ctype.h>
#include <ctype.h>
#include <time.h>
#include <stdio.h>
#include "nsScanner.h"
#include "nsToken.h"
#include "nsToken.h"
#include "nsHTMLTokens.h"
#include "nsParserTypes.h"
#include "prtypes.h"
#include "nsDebug.h"
//#define GESS_MACHINE
#ifdef GESS_MACHINE
#include "nsEntityEx.cpp"
#endif
static nsString gIdentChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-");
static nsString gAttrTextChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-%.");
static nsString gAlphaChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
@ -41,6 +48,7 @@ struct StrToUnicodeStruct
PRInt32 fValue;
};
// KEEP THIS LIST SORTED!
// NOTE: This names table is sorted in ascii collating order. If you
// add a new entry, make sure you put it in the right spot otherwise
@ -86,6 +94,7 @@ static StrToUnicodeStruct gStrToUnicodeTable[] =
};
struct HTMLTagEntry
{
char fName[12];
@ -1016,15 +1025,21 @@ PRInt32 CEntityToken::ConsumeEntity(PRUnichar aChar,nsString& aString,CScanner*
* @param
* @return
*------------------------------------------------------*/
PRInt32 CEntityToken::TranslateToUnicode(void) {
PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) {
char* cp = mTextValue.ToNewCString();
PRInt32 index=FindEntityIndex(cp);
if(kNotFound!=index) {
PRUnichar ch=gStrToUnicodeTable[index].fValue;
aString=ch;
} else {
#ifdef GESS_MACHINE
index=TranslateExtendedEntity(cp,aString);
#endif
}
delete cp;
if(kNotFound==index)
return kNotFound;
return gStrToUnicodeTable[index].fValue;
return index;
}
/**-------------------------------------------------------
* This method ensures that the entity table doesn't get
@ -1033,8 +1048,7 @@ PRInt32 CEntityToken::TranslateToUnicode(void) {
* @update gess 3/25/98
* @return PR_TRUE if valid (ordered correctly)
*------------------------------------------------------*/
PRBool CEntityToken::VerifyEntityTable()
{
PRBool CEntityToken::VerifyEntityTable(){
PRInt32 count=sizeof(gStrToUnicodeTable)/sizeof(StrToUnicodeStruct);
PRInt32 i,j;
for(i=1;i<count-1;i++)
@ -1057,11 +1071,11 @@ PRBool CEntityToken::VerifyEntityTable()
* @return integer offset of string in table, or kNotFound
*------------------------------------------------------*/
PRInt32 CEntityToken::FindEntityIndex(const char* aBuffer,PRInt32 aBufLen) {
PRInt32 result=-1;
PRInt32 result=kNotFound;
PRInt32 cnt=sizeof(gStrToUnicodeTable)/sizeof(StrToUnicodeStruct);
PRInt32 low=0;
PRInt32 high=cnt-1;
PRInt32 middle=-1;
PRInt32 middle=kNotFound;
if(kNotFound==aBufLen) {
aBufLen=strlen(aBuffer);
@ -1071,7 +1085,8 @@ PRInt32 CEntityToken::FindEntityIndex(const char* aBuffer,PRInt32 aBufLen) {
while(low<=high)
{
middle=(PRInt32)(low+high)/2;
result=strncmp(aBuffer,gStrToUnicodeTable[middle].fName,aBufLen);
// result=strncmp(aBuffer,gStrToUnicodeTable[middle].fName,aBufLen);
result=strcmp(aBuffer,gStrToUnicodeTable[middle].fName);
if (result==0) {
return middle;
}

Просмотреть файл

@ -201,10 +201,10 @@ class CEntityToken : public CHTMLToken {
CEntityToken(const nsString& aString);
virtual const char* GetClassName(void);
virtual PRInt32 GetTokenType(void);
PRInt32 TranslateToUnicode(void);
PRInt32 TranslateToUnicodeStr(nsString& aString);
virtual PRInt32 Consume(PRUnichar aChar,CScanner* aScanner);
static PRInt32 ConsumeEntity(PRUnichar aChar,nsString& aString,CScanner* aScanner);
static PRInt32 TranslateToUnicode(nsString& aString);
static PRInt32 TranslateToUnicodeStr(PRInt32 aValue,nsString& aString);
static PRInt32 FindEntityIndex(const char* aBuffer,PRInt32 aBufLen=-1);
static PRBool VerifyEntityTable(void);
static PRInt32 ReduceEntities(nsString& aString);

Просмотреть файл

@ -67,7 +67,7 @@ class nsIParserNode {
// When the node is an entity, this will translate the entity to
// it's unicode value.
virtual PRInt32 TranslateToUnicode() const = 0;
virtual PRInt32 TranslateToUnicodeStr(nsString& aString) const = 0;
};
#endif

Просмотреть файл

@ -194,10 +194,10 @@ const nsString& nsCParserNode::GetValueAt(PRInt32 anIndex) const {
}
PRInt32 nsCParserNode::TranslateToUnicode() const
PRInt32 nsCParserNode::TranslateToUnicodeStr(nsString& aString) const
{
if (eToken_entity == mToken->GetTokenType()) {
return ((CEntityToken*)mToken)->TranslateToUnicode();
return ((CEntityToken*)mToken)->TranslateToUnicodeStr(aString);
}
return -1;
}

Просмотреть файл

@ -65,7 +65,7 @@ class nsCParserNode : public nsIParserNode {
virtual void SetSkippedContent(CHTMLToken* aToken);
// misc
virtual PRInt32 TranslateToUnicode() const;
virtual PRInt32 TranslateToUnicodeStr(nsString& aString) const;
protected:
PRInt32 mAttributeCount;

Просмотреть файл

@ -301,6 +301,7 @@ PRBool nsHTMLParser::Parse(nsIURL* aURL){
mTokenizer=new CTokenizer(aURL, delegate);
mTokenizer->Tokenize();
#define VERBOSE_DEBUG
#ifdef VERBOSE_DEBUG
mTokenizer->DebugDumpTokens(cout);
#endif

Просмотреть файл

@ -14,16 +14,23 @@
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
*/
#include <ctype.h>
#include <ctype.h>
#include <time.h>
#include <stdio.h>
#include "nsScanner.h"
#include "nsToken.h"
#include "nsToken.h"
#include "nsHTMLTokens.h"
#include "nsParserTypes.h"
#include "prtypes.h"
#include "nsDebug.h"
//#define GESS_MACHINE
#ifdef GESS_MACHINE
#include "nsEntityEx.cpp"
#endif
static nsString gIdentChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-");
static nsString gAttrTextChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-%.");
static nsString gAlphaChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
@ -41,6 +48,7 @@ struct StrToUnicodeStruct
PRInt32 fValue;
};
// KEEP THIS LIST SORTED!
// NOTE: This names table is sorted in ascii collating order. If you
// add a new entry, make sure you put it in the right spot otherwise
@ -86,6 +94,7 @@ static StrToUnicodeStruct gStrToUnicodeTable[] =
};
struct HTMLTagEntry
{
char fName[12];
@ -1016,15 +1025,21 @@ PRInt32 CEntityToken::ConsumeEntity(PRUnichar aChar,nsString& aString,CScanner*
* @param
* @return
*------------------------------------------------------*/
PRInt32 CEntityToken::TranslateToUnicode(void) {
PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) {
char* cp = mTextValue.ToNewCString();
PRInt32 index=FindEntityIndex(cp);
if(kNotFound!=index) {
PRUnichar ch=gStrToUnicodeTable[index].fValue;
aString=ch;
} else {
#ifdef GESS_MACHINE
index=TranslateExtendedEntity(cp,aString);
#endif
}
delete cp;
if(kNotFound==index)
return kNotFound;
return gStrToUnicodeTable[index].fValue;
return index;
}
/**-------------------------------------------------------
* This method ensures that the entity table doesn't get
@ -1033,8 +1048,7 @@ PRInt32 CEntityToken::TranslateToUnicode(void) {
* @update gess 3/25/98
* @return PR_TRUE if valid (ordered correctly)
*------------------------------------------------------*/
PRBool CEntityToken::VerifyEntityTable()
{
PRBool CEntityToken::VerifyEntityTable(){
PRInt32 count=sizeof(gStrToUnicodeTable)/sizeof(StrToUnicodeStruct);
PRInt32 i,j;
for(i=1;i<count-1;i++)
@ -1057,11 +1071,11 @@ PRBool CEntityToken::VerifyEntityTable()
* @return integer offset of string in table, or kNotFound
*------------------------------------------------------*/
PRInt32 CEntityToken::FindEntityIndex(const char* aBuffer,PRInt32 aBufLen) {
PRInt32 result=-1;
PRInt32 result=kNotFound;
PRInt32 cnt=sizeof(gStrToUnicodeTable)/sizeof(StrToUnicodeStruct);
PRInt32 low=0;
PRInt32 high=cnt-1;
PRInt32 middle=-1;
PRInt32 middle=kNotFound;
if(kNotFound==aBufLen) {
aBufLen=strlen(aBuffer);
@ -1071,7 +1085,8 @@ PRInt32 CEntityToken::FindEntityIndex(const char* aBuffer,PRInt32 aBufLen) {
while(low<=high)
{
middle=(PRInt32)(low+high)/2;
result=strncmp(aBuffer,gStrToUnicodeTable[middle].fName,aBufLen);
// result=strncmp(aBuffer,gStrToUnicodeTable[middle].fName,aBufLen);
result=strcmp(aBuffer,gStrToUnicodeTable[middle].fName);
if (result==0) {
return middle;
}

Просмотреть файл

@ -201,10 +201,10 @@ class CEntityToken : public CHTMLToken {
CEntityToken(const nsString& aString);
virtual const char* GetClassName(void);
virtual PRInt32 GetTokenType(void);
PRInt32 TranslateToUnicode(void);
PRInt32 TranslateToUnicodeStr(nsString& aString);
virtual PRInt32 Consume(PRUnichar aChar,CScanner* aScanner);
static PRInt32 ConsumeEntity(PRUnichar aChar,nsString& aString,CScanner* aScanner);
static PRInt32 TranslateToUnicode(nsString& aString);
static PRInt32 TranslateToUnicodeStr(PRInt32 aValue,nsString& aString);
static PRInt32 FindEntityIndex(const char* aBuffer,PRInt32 aBufLen=-1);
static PRBool VerifyEntityTable(void);
static PRInt32 ReduceEntities(nsString& aString);

Просмотреть файл

@ -67,7 +67,7 @@ class nsIParserNode {
// When the node is an entity, this will translate the entity to
// it's unicode value.
virtual PRInt32 TranslateToUnicode() const = 0;
virtual PRInt32 TranslateToUnicodeStr(nsString& aString) const = 0;
};
#endif

Просмотреть файл

@ -194,10 +194,10 @@ const nsString& nsCParserNode::GetValueAt(PRInt32 anIndex) const {
}
PRInt32 nsCParserNode::TranslateToUnicode() const
PRInt32 nsCParserNode::TranslateToUnicodeStr(nsString& aString) const
{
if (eToken_entity == mToken->GetTokenType()) {
return ((CEntityToken*)mToken)->TranslateToUnicode();
return ((CEntityToken*)mToken)->TranslateToUnicodeStr(aString);
}
return -1;
}

Просмотреть файл

@ -65,7 +65,7 @@ class nsCParserNode : public nsIParserNode {
virtual void SetSkippedContent(CHTMLToken* aToken);
// misc
virtual PRInt32 TranslateToUnicode() const;
virtual PRInt32 TranslateToUnicodeStr(nsString& aString) const;
protected:
PRInt32 mAttributeCount;